Initial softfloat port from Bochs to 86box, currently selectable only on QT.

This commit is contained in:
TC1995
2023-04-29 18:56:57 +02:00
parent 071c05e65f
commit 7a53e1de45
44 changed files with 16934 additions and 115 deletions

View File

@@ -0,0 +1,17 @@
#
# 86Box A hypervisor and IBM PC system emulator that specializes in
# running old operating systems and software designed for IBM
# PC systems and compatibles from 1981 through fairly recent
# system designs based on the PCI bus.
#
# This file is part of the 86Box distribution.
#
# CMake build script.
#
# Authors: David Hrdlička, <hrdlickadavid@outlook.com>
#
# Copyright 2020-2021 David Hrdlička.
#
add_library(softfloat OBJECT f2xm1.cc fpatan.cc fprem.cc fsincos.cc fyl2x.cc poly.cc softfloat.cc softfloat16.cc
softfloat-muladd.cc softfloat-round-pack.cc softfloat-specialize.cc softfloatx80.cc)

View File

@@ -0,0 +1,46 @@
#include <stdint.h>
typedef int8_t flag;
typedef uint8_t uint8;
typedef int8_t int8;
typedef uint16_t uint16;
typedef int16_t int16;
typedef uint32_t uint32;
typedef int32_t int32;
typedef uint64_t uint64;
typedef int64_t int64;
/*----------------------------------------------------------------------------
| Each of the following `typedef's defines a type that holds integers
| of _exactly_ the number of bits specified. For instance, for most
| implementation of C, `bits16' and `sbits16' should be `typedef'ed to
| `unsigned short int' and `signed short int' (or `short int'), respectively.
*----------------------------------------------------------------------------*/
typedef uint8_t bits8;
typedef int8_t sbits8;
typedef uint16_t bits16;
typedef int16_t sbits16;
typedef uint32_t bits32;
typedef int32_t sbits32;
typedef uint64_t bits64;
typedef int64_t sbits64;
typedef uint8_t Bit8u;
typedef int8_t Bit8s;
typedef uint16_t Bit16u;
typedef int16_t Bit16s;
typedef uint32_t Bit32u;
typedef int32_t Bit32s;
typedef uint64_t Bit64u;
typedef int64_t Bit64s;
/*----------------------------------------------------------------------------
| The `LIT64' macro takes as its argument a textual integer literal and
| if necessary ``marks'' the literal as having a 64-bit integer type.
| For example, the GNU C Compiler (`gcc') requires that 64-bit literals be
| appended with the letters `LL' standing for `long long', which is `gcc's
| name for the 64-bit integer type. Some compilers may allow `LIT64' to be
| defined as the identity macro: `#define LIT64( a ) a'.
*----------------------------------------------------------------------------*/
#define BX_CONST64(a) a##LL
#define BX_CPP_INLINE static __inline

182
src/cpu/softfloat/f2xm1.cc Normal file
View File

@@ -0,0 +1,182 @@
/*============================================================================
This source file is an extension to the SoftFloat IEC/IEEE Floating-point
Arithmetic Package, Release 2b, written for Bochs (x86 achitecture simulator)
floating point emulation.
THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has
been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
Derivative works are acceptable, even for commercial purposes, so long as
(1) the source code for the derivative work includes prominent notice that
the work is derivative, and (2) the source code includes prominent notice with
these four paragraphs for those parts of this code that are retained.
=============================================================================*/
/*============================================================================
* Written for Bochs (x86 achitecture simulator) by
* Stanislav Shwartsman [sshwarts at sourceforge net]
* ==========================================================================*/
#define FLOAT128
#include "softfloatx80.h"
#include "softfloat-round-pack.h"
static const floatx80 floatx80_negone = packFloatx80(1, 0x3fff, BX_CONST64(0x8000000000000000));
static const floatx80 floatx80_neghalf = packFloatx80(1, 0x3ffe, BX_CONST64(0x8000000000000000));
static const float128 float128_ln2 =
packFloat128(BX_CONST64(0x3ffe62e42fefa39e), BX_CONST64(0xf35793c7673007e6));
#ifdef BETTER_THAN_PENTIUM
#define LN2_SIG_HI BX_CONST64(0xb17217f7d1cf79ab)
#define LN2_SIG_LO BX_CONST64(0xc9e3b39800000000) /* 96 bit precision */
#else
#define LN2_SIG_HI BX_CONST64(0xb17217f7d1cf79ab)
#define LN2_SIG_LO BX_CONST64(0xc000000000000000) /* 67-bit precision */
#endif
#define EXP_ARR_SIZE 15
static float128 exp_arr[EXP_ARR_SIZE] =
{
PACK_FLOAT_128(0x3fff000000000000, 0x0000000000000000), /* 1 */
PACK_FLOAT_128(0x3ffe000000000000, 0x0000000000000000), /* 2 */
PACK_FLOAT_128(0x3ffc555555555555, 0x5555555555555555), /* 3 */
PACK_FLOAT_128(0x3ffa555555555555, 0x5555555555555555), /* 4 */
PACK_FLOAT_128(0x3ff8111111111111, 0x1111111111111111), /* 5 */
PACK_FLOAT_128(0x3ff56c16c16c16c1, 0x6c16c16c16c16c17), /* 6 */
PACK_FLOAT_128(0x3ff2a01a01a01a01, 0xa01a01a01a01a01a), /* 7 */
PACK_FLOAT_128(0x3fefa01a01a01a01, 0xa01a01a01a01a01a), /* 8 */
PACK_FLOAT_128(0x3fec71de3a556c73, 0x38faac1c88e50017), /* 9 */
PACK_FLOAT_128(0x3fe927e4fb7789f5, 0xc72ef016d3ea6679), /* 10 */
PACK_FLOAT_128(0x3fe5ae64567f544e, 0x38fe747e4b837dc7), /* 11 */
PACK_FLOAT_128(0x3fe21eed8eff8d89, 0x7b544da987acfe85), /* 12 */
PACK_FLOAT_128(0x3fde6124613a86d0, 0x97ca38331d23af68), /* 13 */
PACK_FLOAT_128(0x3fda93974a8c07c9, 0xd20badf145dfa3e5), /* 14 */
PACK_FLOAT_128(0x3fd6ae7f3e733b81, 0xf11d8656b0ee8cb0) /* 15 */
};
extern float128 EvalPoly(float128 x, float128 *arr, int n, struct float_status_t *status);
/* required -1 < x < 1 */
static float128 poly_exp(float128 x, struct float_status_t *status)
{
/*
// 2 3 4 5 6 7 8 9
// x x x x x x x x x
// e - 1 ~ x + --- + --- + --- + --- + --- + --- + --- + --- + ...
// 2! 3! 4! 5! 6! 7! 8! 9!
//
// 2 3 4 5 6 7 8
// x x x x x x x x
// = x [ 1 + --- + --- + --- + --- + --- + --- + --- + --- + ... ]
// 2! 3! 4! 5! 6! 7! 8! 9!
//
// 8 8
// -- 2k -- 2k+1
// p(x) = > C * x q(x) = > C * x
// -- 2k -- 2k+1
// k=0 k=0
//
// x
// e - 1 ~ x * [ p(x) + x * q(x) ]
//
*/
float128 t = EvalPoly(x, exp_arr, EXP_ARR_SIZE, status);
return float128_mul(t, x, status);
}
// =================================================
// x
// FX2M1 Compute 2 - 1
// =================================================
//
// Uses the following identities:
//
// 1. ----------------------------------------------------------
// x x*ln(2)
// 2 = e
//
// 2. ----------------------------------------------------------
// 2 3 4 5 n
// x x x x x x x
// e = 1 + --- + --- + --- + --- + --- + ... + --- + ...
// 1! 2! 3! 4! 5! n!
//
floatx80 f2xm1(floatx80 a, struct float_status_t *status)
{
/*----------------------------------------------------------------------------
| The pattern for a default generated extended double-precision NaN.
*----------------------------------------------------------------------------*/
const floatx80 floatx80_default_nan = packFloatx80(0, floatx80_default_nan_exp, floatx80_default_nan_fraction);
Bit64u zSig0, zSig1, zSig2;
// handle unsupported extended double-precision floating encodings
if (floatx80_is_unsupported(a))
{
float_raise(status, float_flag_invalid);
return floatx80_default_nan;
}
Bit64u aSig = extractFloatx80Frac(a);
Bit32s aExp = extractFloatx80Exp(a);
int aSign = extractFloatx80Sign(a);
if (aExp == 0x7FFF) {
if ((Bit64u) (aSig<<1))
return propagateFloatx80NaNOne(a, status);
return (aSign) ? floatx80_negone : a;
}
if (aExp == 0) {
if (aSig == 0) return a;
float_raise(status, float_flag_denormal | float_flag_inexact);
normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
tiny_argument:
mul128By64To192(LN2_SIG_HI, LN2_SIG_LO, aSig, &zSig0, &zSig1, &zSig2);
if (0 < (Bit64s) zSig0) {
shortShift128Left(zSig0, zSig1, 1, &zSig0, &zSig1);
--aExp;
}
return
roundAndPackFloatx80(80, aSign, aExp, zSig0, zSig1, status);
}
float_raise(status, float_flag_inexact);
if (aExp < 0x3FFF)
{
if (aExp < FLOATX80_EXP_BIAS-68)
goto tiny_argument;
/* ******************************** */
/* using float128 for approximation */
/* ******************************** */
float128 x = floatx80_to_float128(a, status);
x = float128_mul(x, float128_ln2, status);
x = poly_exp(x, status);
return float128_to_floatx80(x, status);
}
else
{
if (a.exp == 0xBFFF && ! (aSig<<1))
return floatx80_neghalf;
return a;
}
}

288
src/cpu/softfloat/fpatan.cc Normal file
View File

@@ -0,0 +1,288 @@
/*============================================================================
This source file is an extension to the SoftFloat IEC/IEEE Floating-point
Arithmetic Package, Release 2b, written for Bochs (x86 achitecture simulator)
floating point emulation.
THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has
been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
Derivative works are acceptable, even for commercial purposes, so long as
(1) the source code for the derivative work includes prominent notice that
the work is derivative, and (2) the source code includes prominent notice with
these four paragraphs for those parts of this code that are retained.
=============================================================================*/
/*============================================================================
* Written for Bochs (x86 achitecture simulator) by
* Stanislav Shwartsman [sshwarts at sourceforge net]
* ==========================================================================*/
#define FLOAT128
#include "softfloatx80.h"
#include "softfloat-round-pack.h"
#include "fpu_constant.h"
#define FPATAN_ARR_SIZE 11
static const float128 float128_one =
packFloat128(BX_CONST64(0x3fff000000000000), BX_CONST64(0x0000000000000000));
static const float128 float128_sqrt3 =
packFloat128(BX_CONST64(0x3fffbb67ae8584ca), BX_CONST64(0xa73b25742d7078b8));
static const floatx80 floatx80_pi =
packFloatx80(0, 0x4000, BX_CONST64(0xc90fdaa22168c235));
static const float128 float128_pi2 =
packFloat128(BX_CONST64(0x3fff921fb54442d1), BX_CONST64(0x8469898CC5170416));
static const float128 float128_pi4 =
packFloat128(BX_CONST64(0x3ffe921fb54442d1), BX_CONST64(0x8469898CC5170416));
static const float128 float128_pi6 =
packFloat128(BX_CONST64(0x3ffe0c152382d736), BX_CONST64(0x58465BB32E0F580F));
static float128 atan_arr[FPATAN_ARR_SIZE] =
{
PACK_FLOAT_128(0x3fff000000000000, 0x0000000000000000), /* 1 */
PACK_FLOAT_128(0xbffd555555555555, 0x5555555555555555), /* 3 */
PACK_FLOAT_128(0x3ffc999999999999, 0x999999999999999a), /* 5 */
PACK_FLOAT_128(0xbffc249249249249, 0x2492492492492492), /* 7 */
PACK_FLOAT_128(0x3ffbc71c71c71c71, 0xc71c71c71c71c71c), /* 9 */
PACK_FLOAT_128(0xbffb745d1745d174, 0x5d1745d1745d1746), /* 11 */
PACK_FLOAT_128(0x3ffb3b13b13b13b1, 0x3b13b13b13b13b14), /* 13 */
PACK_FLOAT_128(0xbffb111111111111, 0x1111111111111111), /* 15 */
PACK_FLOAT_128(0x3ffae1e1e1e1e1e1, 0xe1e1e1e1e1e1e1e2), /* 17 */
PACK_FLOAT_128(0xbffaaf286bca1af2, 0x86bca1af286bca1b), /* 19 */
PACK_FLOAT_128(0x3ffa861861861861, 0x8618618618618618) /* 21 */
};
extern float128 OddPoly(float128 x, float128 *arr, int n, struct float_status_t *status);
/* |x| < 1/4 */
static float128 poly_atan(float128 x1, struct float_status_t *status)
{
/*
// 3 5 7 9 11 13 15 17
// x x x x x x x x
// atan(x) ~ x - --- + --- - --- + --- - ---- + ---- - ---- + ----
// 3 5 7 9 11 13 15 17
//
// 2 4 6 8 10 12 14 16
// x x x x x x x x
// = x * [ 1 - --- + --- - --- + --- - ---- + ---- - ---- + ---- ]
// 3 5 7 9 11 13 15 17
//
// 5 5
// -- 4k -- 4k+2
// p(x) = > C * x q(x) = > C * x
// -- 2k -- 2k+1
// k=0 k=0
//
// 2
// atan(x) ~ x * [ p(x) + x * q(x) ]
//
*/
return OddPoly(x1, atan_arr, FPATAN_ARR_SIZE, status);
}
// =================================================
// FPATAN Compute y * log (x)
// 2
// =================================================
//
// Uses the following identities:
//
// 1. ----------------------------------------------------------
//
// atan(-x) = -atan(x)
//
// 2. ----------------------------------------------------------
//
// x + y
// atan(x) + atan(y) = atan -------, xy < 1
// 1-xy
//
// x + y
// atan(x) + atan(y) = atan ------- + PI, x > 0, xy > 1
// 1-xy
//
// x + y
// atan(x) + atan(y) = atan ------- - PI, x < 0, xy > 1
// 1-xy
//
// 3. ----------------------------------------------------------
//
// atan(x) = atan(INF) + atan(- 1/x)
//
// x-1
// atan(x) = PI/4 + atan( ----- )
// x+1
//
// x * sqrt(3) - 1
// atan(x) = PI/6 + atan( ----------------- )
// x + sqrt(3)
//
// 4. ----------------------------------------------------------
// 3 5 7 9 2n+1
// x x x x n x
// atan(x) = x - --- + --- - --- + --- - ... + (-1) ------ + ...
// 3 5 7 9 2n+1
//
floatx80 fpatan(floatx80 a, floatx80 b, struct float_status_t *status)
{
/*----------------------------------------------------------------------------
| The pattern for a default generated extended double-precision NaN.
*----------------------------------------------------------------------------*/
const floatx80 floatx80_default_nan = packFloatx80(0, floatx80_default_nan_exp, floatx80_default_nan_fraction);
// handle unsupported extended double-precision floating encodings
if (floatx80_is_unsupported(a) || floatx80_is_unsupported(b)) {
float_raise(status, float_flag_invalid);
return floatx80_default_nan;
}
Bit64u aSig = extractFloatx80Frac(a);
Bit32s aExp = extractFloatx80Exp(a);
int aSign = extractFloatx80Sign(a);
Bit64u bSig = extractFloatx80Frac(b);
Bit32s bExp = extractFloatx80Exp(b);
int bSign = extractFloatx80Sign(b);
int zSign = aSign ^ bSign;
if (bExp == 0x7FFF)
{
if ((Bit64u) (bSig<<1))
return propagateFloatx80NaN(a, b, status);
if (aExp == 0x7FFF) {
if ((Bit64u) (aSig<<1))
return propagateFloatx80NaN(a, b, status);
if (aSign) { /* return 3PI/4 */
return roundAndPackFloatx80(80, bSign,
FLOATX80_3PI4_EXP, FLOAT_3PI4_HI, FLOAT_3PI4_LO, status);
}
else { /* return PI/4 */
return roundAndPackFloatx80(80, bSign,
FLOATX80_PI4_EXP, FLOAT_PI_HI, FLOAT_PI_LO, status);
}
}
if (aSig && (aExp == 0))
float_raise(status, float_flag_denormal);
/* return PI/2 */
return roundAndPackFloatx80(80, bSign, FLOATX80_PI2_EXP, FLOAT_PI_HI, FLOAT_PI_LO, status);
}
if (aExp == 0x7FFF)
{
if ((Bit64u) (aSig<<1))
return propagateFloatx80NaN(a, b, status);
if (bSig && (bExp == 0))
float_raise(status, float_flag_denormal);
return_PI_or_ZERO:
if (aSign) { /* return PI */
return roundAndPackFloatx80(80, bSign, FLOATX80_PI_EXP, FLOAT_PI_HI, FLOAT_PI_LO, status);
} else { /* return 0 */
return packFloatx80(bSign, 0, 0);
}
}
if (bExp == 0)
{
if (bSig == 0) {
if (aSig && (aExp == 0)) float_raise(status, float_flag_denormal);
goto return_PI_or_ZERO;
}
float_raise(status, float_flag_denormal);
normalizeFloatx80Subnormal(bSig, &bExp, &bSig);
}
if (aExp == 0)
{
if (aSig == 0) /* return PI/2 */
return roundAndPackFloatx80(80, bSign, FLOATX80_PI2_EXP, FLOAT_PI_HI, FLOAT_PI_LO, status);
float_raise(status, float_flag_denormal);
normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
}
float_raise(status, float_flag_inexact);
/* |a| = |b| ==> return PI/4 */
if (aSig == bSig && aExp == bExp)
return roundAndPackFloatx80(80, bSign, FLOATX80_PI4_EXP, FLOAT_PI_HI, FLOAT_PI_LO, status);
/* ******************************** */
/* using float128 for approximation */
/* ******************************** */
float128 a128 = normalizeRoundAndPackFloat128(0, aExp-0x10, aSig, 0, status);
float128 b128 = normalizeRoundAndPackFloat128(0, bExp-0x10, bSig, 0, status);
float128 x;
int swap = 0, add_pi6 = 0, add_pi4 = 0;
if (aExp > bExp || (aExp == bExp && aSig > bSig))
{
x = float128_div(b128, a128, status);
}
else {
x = float128_div(a128, b128, status);
swap = 1;
}
Bit32s xExp = extractFloat128Exp(x);
if (xExp <= FLOATX80_EXP_BIAS-40)
goto approximation_completed;
if (x.hi >= BX_CONST64(0x3ffe800000000000)) // 3/4 < x < 1
{
/*
arctan(x) = arctan((x-1)/(x+1)) + pi/4
*/
float128 t1 = float128_sub(x, float128_one, status);
float128 t2 = float128_add(x, float128_one, status);
x = float128_div(t1, t2, status);
add_pi4 = 1;
}
else
{
/* argument correction */
if (xExp >= 0x3FFD) // 1/4 < x < 3/4
{
/*
arctan(x) = arctan((x*sqrt(3)-1)/(x+sqrt(3))) + pi/6
*/
float128 t1 = float128_mul(x, float128_sqrt3, status);
float128 t2 = float128_add(x, float128_sqrt3, status);
x = float128_sub(t1, float128_one, status);
x = float128_div(x, t2, status);
add_pi6 = 1;
}
}
x = poly_atan(x, status);
if (add_pi6) x = float128_add(x, float128_pi6, status);
if (add_pi4) x = float128_add(x, float128_pi4, status);
approximation_completed:
if (swap) x = float128_sub(float128_pi2, x, status);
floatx80 result = float128_to_floatx80(x, status);
if (zSign) floatx80_chs(result);
int rSign = extractFloatx80Sign(result);
if (!bSign && rSign)
return floatx80_add(result, floatx80_pi, status);
if (bSign && !rSign)
return floatx80_sub(result, floatx80_pi, status);
return result;
}

196
src/cpu/softfloat/fprem.cc Normal file
View File

@@ -0,0 +1,196 @@
/*============================================================================
This source file is an extension to the SoftFloat IEC/IEEE Floating-point
Arithmetic Package, Release 2b, written for Bochs (x86 achitecture simulator)
floating point emulation.
THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has
been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
Derivative works are acceptable, even for commercial purposes, so long as
(1) the source code for the derivative work includes prominent notice that
the work is derivative, and (2) the source code includes prominent notice with
these four paragraphs for those parts of this code that are retained.
=============================================================================*/
/*============================================================================
* Written for Bochs (x86 achitecture simulator) by
* Stanislav Shwartsman [sshwarts at sourceforge net]
* ==========================================================================*/
#include "softfloatx80.h"
#include "softfloat-round-pack.h"
#define USE_estimateDiv128To64
#include "softfloat-macros.h"
/* executes single exponent reduction cycle */
static Bit64u remainder_kernel(Bit64u aSig0, Bit64u bSig, int expDiff, Bit64u *zSig0, Bit64u *zSig1)
{
Bit64u term0, term1;
Bit64u aSig1 = 0;
shortShift128Left(aSig1, aSig0, expDiff, &aSig1, &aSig0);
Bit64u q = estimateDiv128To64(aSig1, aSig0, bSig);
mul64To128(bSig, q, &term0, &term1);
sub128(aSig1, aSig0, term0, term1, zSig1, zSig0);
while ((Bit64s)(*zSig1) < 0) {
--q;
add128(*zSig1, *zSig0, 0, bSig, zSig1, zSig0);
}
return q;
}
static int do_fprem(floatx80 a, floatx80 b, floatx80 *r, Bit64u *q, int rounding_mode, struct float_status_t *status)
{
/*----------------------------------------------------------------------------
| The pattern for a default generated extended double-precision NaN.
*----------------------------------------------------------------------------*/
const floatx80 floatx80_default_nan = packFloatx80(0, floatx80_default_nan_exp, floatx80_default_nan_fraction);
Bit32s aExp, bExp, zExp, expDiff;
Bit64u aSig0, aSig1, bSig;
int aSign;
*q = 0;
// handle unsupported extended double-precision floating encodings
if (floatx80_is_unsupported(a) || floatx80_is_unsupported(b))
{
float_raise(status, float_flag_invalid);
*r = floatx80_default_nan;
return -1;
}
aSig0 = extractFloatx80Frac(a);
aExp = extractFloatx80Exp(a);
aSign = extractFloatx80Sign(a);
bSig = extractFloatx80Frac(b);
bExp = extractFloatx80Exp(b);
if (aExp == 0x7FFF) {
if ((Bit64u) (aSig0<<1) || ((bExp == 0x7FFF) && (Bit64u) (bSig<<1))) {
*r = propagateFloatx80NaN(a, b, status);
return -1;
}
float_raise(status, float_flag_invalid);
*r = floatx80_default_nan;
return -1;
}
if (bExp == 0x7FFF) {
if ((Bit64u) (bSig<<1)) {
*r = propagateFloatx80NaN(a, b, status);
return -1;
}
if (aExp == 0 && aSig0) {
float_raise(status, float_flag_denormal);
normalizeFloatx80Subnormal(aSig0, &aExp, &aSig0);
*r = (a.fraction & BX_CONST64(0x8000000000000000)) ?
packFloatx80(aSign, aExp, aSig0) : a;
return 0;
}
*r = a;
return 0;
}
if (bExp == 0) {
if (bSig == 0) {
float_raise(status, float_flag_invalid);
*r = floatx80_default_nan;
return -1;
}
float_raise(status, float_flag_denormal);
normalizeFloatx80Subnormal(bSig, &bExp, &bSig);
}
if (aExp == 0) {
if (aSig0 == 0) {
*r = a;
return 0;
}
float_raise(status, float_flag_denormal);
normalizeFloatx80Subnormal(aSig0, &aExp, &aSig0);
}
expDiff = aExp - bExp;
aSig1 = 0;
Bit32u overflow = 0;
if (expDiff >= 64) {
int n = (expDiff & 0x1f) | 0x20;
remainder_kernel(aSig0, bSig, n, &aSig0, &aSig1);
zExp = aExp - n;
overflow = 1;
}
else {
zExp = bExp;
if (expDiff < 0) {
if (expDiff < -1) {
*r = (a.fraction & BX_CONST64(0x8000000000000000)) ?
packFloatx80(aSign, aExp, aSig0) : a;
return 0;
}
shift128Right(aSig0, 0, 1, &aSig0, &aSig1);
expDiff = 0;
}
if (expDiff > 0) {
*q = remainder_kernel(aSig0, bSig, expDiff, &aSig0, &aSig1);
}
else {
if (bSig <= aSig0) {
aSig0 -= bSig;
*q = 1;
}
}
if (rounding_mode == float_round_nearest_even)
{
Bit64u term0, term1;
shift128Right(bSig, 0, 1, &term0, &term1);
if (! lt128(aSig0, aSig1, term0, term1))
{
int lt = lt128(term0, term1, aSig0, aSig1);
int eq = eq128(aSig0, aSig1, term0, term1);
if ((eq && ((*q) & 1)) || lt) {
aSign = !aSign;
++(*q);
}
if (lt) sub128(bSig, 0, aSig0, aSig1, &aSig0, &aSig1);
}
}
}
*r = normalizeRoundAndPackFloatx80(80, aSign, zExp, aSig0, aSig1, status);
return overflow;
}
/*----------------------------------------------------------------------------
| Returns the remainder of the extended double-precision floating-point value
| `a' with respect to the corresponding value `b'. The operation is performed
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
*----------------------------------------------------------------------------*/
int floatx80_ieee754_remainder(floatx80 a, floatx80 b, floatx80 *r, Bit64u *q, struct float_status_t *status)
{
return do_fprem(a, b, r, q, float_round_nearest_even, status);
}
/*----------------------------------------------------------------------------
| Returns the remainder of the extended double-precision floating-point value
| `a' with respect to the corresponding value `b'. Unlike previous function
| the function does not compute the remainder specified in the IEC/IEEE
| Standard for Binary Floating-Point Arithmetic. This function operates
| differently from the previous function in the way that it rounds the
| quotient of 'a' divided by 'b' to an integer.
*----------------------------------------------------------------------------*/
int floatx80_remainder(floatx80 a, floatx80 b, floatx80 *r, Bit64u *q, struct float_status_t *status)
{
return do_fprem(a, b, r, q, float_round_to_zero, status);
}

View File

@@ -0,0 +1,82 @@
/*============================================================================
This source file is an extension to the SoftFloat IEC/IEEE Floating-point
Arithmetic Package, Release 2b, written for Bochs (x86 achitecture simulator)
floating point emulation.
THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has
been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
Derivative works are acceptable, even for commercial purposes, so long as
(1) the source code for the derivative work includes prominent notice that
the work is derivative, and (2) the source code includes prominent notice with
these four paragraphs for those parts of this code that are retained.
=============================================================================*/
#ifndef _FPU_CONSTANTS_H_
#define _FPU_CONSTANTS_H_
#include "config.h"
// Pentium CPU uses only 68-bit precision M_PI approximation
//#define BETTER_THAN_PENTIUM
/*============================================================================
* Written for Bochs (x86 achitecture simulator) by
* Stanislav Shwartsman [sshwarts at sourceforge net]
* ==========================================================================*/
//////////////////////////////
// PI, PI/2, PI/4 constants
//////////////////////////////
#define FLOATX80_PI_EXP (0x4000)
// 128-bit PI fraction
#ifdef BETTER_THAN_PENTIUM
#define FLOAT_PI_HI (BX_CONST64(0xc90fdaa22168c234))
#define FLOAT_PI_LO (BX_CONST64(0xc4c6628b80dc1cd1))
#else
#define FLOAT_PI_HI (BX_CONST64(0xc90fdaa22168c234))
#define FLOAT_PI_LO (BX_CONST64(0xC000000000000000))
#endif
#define FLOATX80_PI2_EXP (0x3FFF)
#define FLOATX80_PI4_EXP (0x3FFE)
//////////////////////////////
// 3PI/4 constant
//////////////////////////////
#define FLOATX80_3PI4_EXP (0x4000)
// 128-bit 3PI/4 fraction
#ifdef BETTER_THAN_PENTIUM
#define FLOAT_3PI4_HI (BX_CONST64(0x96cbe3f9990e91a7))
#define FLOAT_3PI4_LO (BX_CONST64(0x9394c9e8a0a5159c))
#else
#define FLOAT_3PI4_HI (BX_CONST64(0x96cbe3f9990e91a7))
#define FLOAT_3PI4_LO (BX_CONST64(0x9000000000000000))
#endif
//////////////////////////////
// 1/LN2 constant
//////////////////////////////
#define FLOAT_LN2INV_EXP (0x3FFF)
// 128-bit 1/LN2 fraction
#ifdef BETTER_THAN_PENTIUM
#define FLOAT_LN2INV_HI (BX_CONST64(0xb8aa3b295c17f0bb))
#define FLOAT_LN2INV_LO (BX_CONST64(0xbe87fed0691d3e89))
#else
#define FLOAT_LN2INV_HI (BX_CONST64(0xb8aa3b295c17f0bb))
#define FLOAT_LN2INV_LO (BX_CONST64(0xC000000000000000))
#endif
#endif

View File

@@ -0,0 +1,441 @@
/*============================================================================
This source file is an extension to the SoftFloat IEC/IEEE Floating-point
Arithmetic Package, Release 2b, written for Bochs (x86 achitecture simulator)
floating point emulation.
THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has
been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
Derivative works are acceptable, even for commercial purposes, so long as
(1) the source code for the derivative work includes prominent notice that
the work is derivative, and (2) the source code includes prominent notice with
these four paragraphs for those parts of this code that are retained.
=============================================================================*/
/*============================================================================
* Written for Bochs (x86 achitecture simulator) by
* Stanislav Shwartsman [sshwarts at sourceforge net]
* ==========================================================================*/
#define FLOAT128
#define USE_estimateDiv128To64
#include "softfloatx80.h"
#include "softfloat-round-pack.h"
#include "fpu_constant.h"
static const floatx80 floatx80_one = packFloatx80(0, 0x3fff, BX_CONST64(0x8000000000000000));
/* reduce trigonometric function argument using 128-bit precision
M_PI approximation */
static Bit64u argument_reduction_kernel(Bit64u aSig0, int Exp, Bit64u *zSig0, Bit64u *zSig1)
{
Bit64u term0, term1, term2;
Bit64u aSig1 = 0;
shortShift128Left(aSig1, aSig0, Exp, &aSig1, &aSig0);
Bit64u q = estimateDiv128To64(aSig1, aSig0, FLOAT_PI_HI);
mul128By64To192(FLOAT_PI_HI, FLOAT_PI_LO, q, &term0, &term1, &term2);
sub128(aSig1, aSig0, term0, term1, zSig1, zSig0);
while ((Bit64s)(*zSig1) < 0) {
--q;
add192(*zSig1, *zSig0, term2, 0, FLOAT_PI_HI, FLOAT_PI_LO, zSig1, zSig0, &term2);
}
*zSig1 = term2;
return q;
}
static int reduce_trig_arg(int expDiff, int *zSign, Bit64u *aSig0, Bit64u *aSig1)
{
Bit64u term0, term1, q = 0;
if (expDiff < 0) {
shift128Right(*aSig0, 0, 1, aSig0, aSig1);
expDiff = 0;
}
if (expDiff > 0) {
q = argument_reduction_kernel(*aSig0, expDiff, aSig0, aSig1);
}
else {
if (FLOAT_PI_HI <= *aSig0) {
*aSig0 -= FLOAT_PI_HI;
q = 1;
}
}
shift128Right(FLOAT_PI_HI, FLOAT_PI_LO, 1, &term0, &term1);
if (! lt128(*aSig0, *aSig1, term0, term1))
{
int lt = lt128(term0, term1, *aSig0, *aSig1);
int eq = eq128(*aSig0, *aSig1, term0, term1);
if ((eq && (q & 1)) || lt) {
*zSign = !(*zSign);
++q;
}
if (lt) sub128(FLOAT_PI_HI, FLOAT_PI_LO, *aSig0, *aSig1, aSig0, aSig1);
}
return (int)(q & 3);
}
#define SIN_ARR_SIZE 11
#define COS_ARR_SIZE 11
static float128 sin_arr[SIN_ARR_SIZE] =
{
PACK_FLOAT_128(0x3fff000000000000, 0x0000000000000000), /* 1 */
PACK_FLOAT_128(0xbffc555555555555, 0x5555555555555555), /* 3 */
PACK_FLOAT_128(0x3ff8111111111111, 0x1111111111111111), /* 5 */
PACK_FLOAT_128(0xbff2a01a01a01a01, 0xa01a01a01a01a01a), /* 7 */
PACK_FLOAT_128(0x3fec71de3a556c73, 0x38faac1c88e50017), /* 9 */
PACK_FLOAT_128(0xbfe5ae64567f544e, 0x38fe747e4b837dc7), /* 11 */
PACK_FLOAT_128(0x3fde6124613a86d0, 0x97ca38331d23af68), /* 13 */
PACK_FLOAT_128(0xbfd6ae7f3e733b81, 0xf11d8656b0ee8cb0), /* 15 */
PACK_FLOAT_128(0x3fce952c77030ad4, 0xa6b2605197771b00), /* 17 */
PACK_FLOAT_128(0xbfc62f49b4681415, 0x724ca1ec3b7b9675), /* 19 */
PACK_FLOAT_128(0x3fbd71b8ef6dcf57, 0x18bef146fcee6e45) /* 21 */
};
static float128 cos_arr[COS_ARR_SIZE] =
{
PACK_FLOAT_128(0x3fff000000000000, 0x0000000000000000), /* 0 */
PACK_FLOAT_128(0xbffe000000000000, 0x0000000000000000), /* 2 */
PACK_FLOAT_128(0x3ffa555555555555, 0x5555555555555555), /* 4 */
PACK_FLOAT_128(0xbff56c16c16c16c1, 0x6c16c16c16c16c17), /* 6 */
PACK_FLOAT_128(0x3fefa01a01a01a01, 0xa01a01a01a01a01a), /* 8 */
PACK_FLOAT_128(0xbfe927e4fb7789f5, 0xc72ef016d3ea6679), /* 10 */
PACK_FLOAT_128(0x3fe21eed8eff8d89, 0x7b544da987acfe85), /* 12 */
PACK_FLOAT_128(0xbfda93974a8c07c9, 0xd20badf145dfa3e5), /* 14 */
PACK_FLOAT_128(0x3fd2ae7f3e733b81, 0xf11d8656b0ee8cb0), /* 16 */
PACK_FLOAT_128(0xbfca6827863b97d9, 0x77bb004886a2c2ab), /* 18 */
PACK_FLOAT_128(0x3fc1e542ba402022, 0x507a9cad2bf8f0bb) /* 20 */
};
extern float128 OddPoly (float128 x, float128 *arr, int n, struct float_status_t *status);
/* 0 <= x <= pi/4 */
BX_CPP_INLINE float128 poly_sin(float128 x, struct float_status_t *status)
{
// 3 5 7 9 11 13 15
// x x x x x x x
// sin (x) ~ x - --- + --- - --- + --- - ---- + ---- - ---- =
// 3! 5! 7! 9! 11! 13! 15!
//
// 2 4 6 8 10 12 14
// x x x x x x x
// = x * [ 1 - --- + --- - --- + --- - ---- + ---- - ---- ] =
// 3! 5! 7! 9! 11! 13! 15!
//
// 3 3
// -- 4k -- 4k+2
// p(x) = > C * x > 0 q(x) = > C * x < 0
// -- 2k -- 2k+1
// k=0 k=0
//
// 2
// sin(x) ~ x * [ p(x) + x * q(x) ]
//
return OddPoly(x, sin_arr, SIN_ARR_SIZE, status);
}
extern float128 EvenPoly(float128 x, float128 *arr, int n, struct float_status_t *status);
/* 0 <= x <= pi/4 */
BX_CPP_INLINE float128 poly_cos(float128 x, struct float_status_t *status)
{
// 2 4 6 8 10 12 14
// x x x x x x x
// cos (x) ~ 1 - --- + --- - --- + --- - ---- + ---- - ----
// 2! 4! 6! 8! 10! 12! 14!
//
// 3 3
// -- 4k -- 4k+2
// p(x) = > C * x > 0 q(x) = > C * x < 0
// -- 2k -- 2k+1
// k=0 k=0
//
// 2
// cos(x) ~ [ p(x) + x * q(x) ]
//
return EvenPoly(x, cos_arr, COS_ARR_SIZE, status);
}
BX_CPP_INLINE void sincos_invalid(floatx80 *sin_a, floatx80 *cos_a, floatx80 a)
{
if (sin_a) *sin_a = a;
if (cos_a) *cos_a = a;
}
BX_CPP_INLINE void sincos_tiny_argument(floatx80 *sin_a, floatx80 *cos_a, floatx80 a)
{
if (sin_a) *sin_a = a;
if (cos_a) *cos_a = floatx80_one;
}
static floatx80 sincos_approximation(int neg, float128 r, Bit64u quotient, struct float_status_t *status)
{
if (quotient & 0x1) {
r = poly_cos(r, status);
neg = 0;
} else {
r = poly_sin(r, status);
}
floatx80 result = float128_to_floatx80(r, status);
if (quotient & 0x2)
neg = ! neg;
if (neg)
floatx80_chs(result);
return result;
}
// =================================================
// FSINCOS Compute sin(x) and cos(x)
// =================================================
//
// Uses the following identities:
// ----------------------------------------------------------
//
// sin(-x) = -sin(x)
// cos(-x) = cos(x)
//
// sin(x+y) = sin(x)*cos(y)+cos(x)*sin(y)
// cos(x+y) = sin(x)*sin(y)+cos(x)*cos(y)
//
// sin(x+ pi/2) = cos(x)
// sin(x+ pi) = -sin(x)
// sin(x+3pi/2) = -cos(x)
// sin(x+2pi) = sin(x)
//
int fsincos(floatx80 a, floatx80 *sin_a, floatx80 *cos_a, struct float_status_t *status)
{
/*----------------------------------------------------------------------------
| The pattern for a default generated extended double-precision NaN.
*----------------------------------------------------------------------------*/
const floatx80 floatx80_default_nan = packFloatx80(0, floatx80_default_nan_exp, floatx80_default_nan_fraction);
Bit64u aSig0, aSig1 = 0;
Bit32s aExp, zExp, expDiff;
int aSign, zSign;
int q = 0;
// handle unsupported extended double-precision floating encodings
if (floatx80_is_unsupported(a)) {
goto invalid;
}
aSig0 = extractFloatx80Frac(a);
aExp = extractFloatx80Exp(a);
aSign = extractFloatx80Sign(a);
/* invalid argument */
if (aExp == 0x7FFF) {
if ((Bit64u) (aSig0<<1)) {
sincos_invalid(sin_a, cos_a, propagateFloatx80NaNOne(a, status));
return 0;
}
invalid:
float_raise(status, float_flag_invalid);
sincos_invalid(sin_a, cos_a, floatx80_default_nan);
return 0;
}
if (aExp == 0) {
if (aSig0 == 0) {
sincos_tiny_argument(sin_a, cos_a, a);
return 0;
}
float_raise(status, float_flag_denormal);
/* handle pseudo denormals */
if (! (aSig0 & BX_CONST64(0x8000000000000000)))
{
float_raise(status, float_flag_inexact);
if (sin_a)
float_raise(status, float_flag_underflow);
sincos_tiny_argument(sin_a, cos_a, a);
return 0;
}
normalizeFloatx80Subnormal(aSig0, &aExp, &aSig0);
}
zSign = aSign;
zExp = FLOATX80_EXP_BIAS;
expDiff = aExp - zExp;
/* argument is out-of-range */
if (expDiff >= 63)
return -1;
float_raise(status, float_flag_inexact);
if (expDiff < -1) { // doesn't require reduction
if (expDiff <= -68) {
a = packFloatx80(aSign, aExp, aSig0);
sincos_tiny_argument(sin_a, cos_a, a);
return 0;
}
zExp = aExp;
}
else {
q = reduce_trig_arg(expDiff, &zSign, &aSig0, &aSig1);
}
/* **************************** */
/* argument reduction completed */
/* **************************** */
/* using float128 for approximation */
float128 r = normalizeRoundAndPackFloat128(0, zExp-0x10, aSig0, aSig1, status);
if (aSign) q = -q;
if (sin_a) *sin_a = sincos_approximation(zSign, r, q, status);
if (cos_a) *cos_a = sincos_approximation(zSign, r, q+1, status);
return 0;
}
int fsin(floatx80 *a, struct float_status_t *status)
{
return fsincos(*a, a, 0, status);
}
int fcos(floatx80 *a, struct float_status_t *status)
{
return fsincos(*a, 0, a, status);
}
// =================================================
// FPTAN Compute tan(x)
// =================================================
//
// Uses the following identities:
//
// 1. ----------------------------------------------------------
//
// sin(-x) = -sin(x)
// cos(-x) = cos(x)
//
// sin(x+y) = sin(x)*cos(y)+cos(x)*sin(y)
// cos(x+y) = sin(x)*sin(y)+cos(x)*cos(y)
//
// sin(x+ pi/2) = cos(x)
// sin(x+ pi) = -sin(x)
// sin(x+3pi/2) = -cos(x)
// sin(x+2pi) = sin(x)
//
// 2. ----------------------------------------------------------
//
// sin(x)
// tan(x) = ------
// cos(x)
//
int ftan(floatx80 *a, struct float_status_t *status)
{
/*----------------------------------------------------------------------------
| The pattern for a default generated extended double-precision NaN.
*----------------------------------------------------------------------------*/
const floatx80 floatx80_default_nan = packFloatx80(0, floatx80_default_nan_exp, floatx80_default_nan_fraction);
Bit64u aSig0, aSig1 = 0;
Bit32s aExp, zExp, expDiff;
int aSign, zSign;
int q = 0;
// handle unsupported extended double-precision floating encodings
if (floatx80_is_unsupported(*a)) {
goto invalid;
}
aSig0 = extractFloatx80Frac(*a);
aExp = extractFloatx80Exp(*a);
aSign = extractFloatx80Sign(*a);
/* invalid argument */
if (aExp == 0x7FFF) {
if ((Bit64u) (aSig0<<1))
{
*a = propagateFloatx80NaNOne(*a, status);
return 0;
}
invalid:
float_raise(status, float_flag_invalid);
*a = floatx80_default_nan;
return 0;
}
if (aExp == 0) {
if (aSig0 == 0) return 0;
float_raise(status, float_flag_denormal);
/* handle pseudo denormals */
if (! (aSig0 & BX_CONST64(0x8000000000000000)))
{
float_raise(status, float_flag_inexact | float_flag_underflow);
return 0;
}
normalizeFloatx80Subnormal(aSig0, &aExp, &aSig0);
}
zSign = aSign;
zExp = FLOATX80_EXP_BIAS;
expDiff = aExp - zExp;
/* argument is out-of-range */
if (expDiff >= 63)
return -1;
float_raise(status, float_flag_inexact);
if (expDiff < -1) { // doesn't require reduction
if (expDiff <= -68) {
*a = packFloatx80(aSign, aExp, aSig0);
return 0;
}
zExp = aExp;
}
else {
q = reduce_trig_arg(expDiff, &zSign, &aSig0, &aSig1);
}
/* **************************** */
/* argument reduction completed */
/* **************************** */
/* using float128 for approximation */
float128 r = normalizeRoundAndPackFloat128(0, zExp-0x10, aSig0, aSig1, status);
float128 sin_r = poly_sin(r, status);
float128 cos_r = poly_cos(r, status);
if (q & 0x1) {
r = float128_div(cos_r, sin_r, status);
zSign = ! zSign;
} else {
r = float128_div(sin_r, cos_r, status);
}
*a = float128_to_floatx80(r, status);
if (zSign)
floatx80_chs(*a);
return 0;
}

363
src/cpu/softfloat/fyl2x.cc Normal file
View File

@@ -0,0 +1,363 @@
/*============================================================================
This source file is an extension to the SoftFloat IEC/IEEE Floating-point
Arithmetic Package, Release 2b, written for Bochs (x86 achitecture simulator)
floating point emulation.
THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has
been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
Derivative works are acceptable, even for commercial purposes, so long as
(1) the source code for the derivative work includes prominent notice that
the work is derivative, and (2) the source code includes prominent notice with
these four paragraphs for those parts of this code that are retained.
=============================================================================*/
/*============================================================================
* Written for Bochs (x86 achitecture simulator) by
* Stanislav Shwartsman [sshwarts at sourceforge net]
* ==========================================================================*/
#define FLOAT128
#include "softfloatx80.h"
#include "softfloat-round-pack.h"
#include "fpu_constant.h"
static const floatx80 floatx80_one =
packFloatx80(0, 0x3fff, BX_CONST64(0x8000000000000000));
static const float128 float128_one =
packFloat128(BX_CONST64(0x3fff000000000000), BX_CONST64(0x0000000000000000));
static const float128 float128_two =
packFloat128(BX_CONST64(0x4000000000000000), BX_CONST64(0x0000000000000000));
static const float128 float128_ln2inv2 =
packFloat128(BX_CONST64(0x400071547652b82f), BX_CONST64(0xe1777d0ffda0d23a));
#define SQRT2_HALF_SIG BX_CONST64(0xb504f333f9de6484)
extern float128 OddPoly(float128 x, float128 *arr, int n, struct float_status_t *status);
#define L2_ARR_SIZE 9
static float128 ln_arr[L2_ARR_SIZE] =
{
PACK_FLOAT_128(0x3fff000000000000, 0x0000000000000000), /* 1 */
PACK_FLOAT_128(0x3ffd555555555555, 0x5555555555555555), /* 3 */
PACK_FLOAT_128(0x3ffc999999999999, 0x999999999999999a), /* 5 */
PACK_FLOAT_128(0x3ffc249249249249, 0x2492492492492492), /* 7 */
PACK_FLOAT_128(0x3ffbc71c71c71c71, 0xc71c71c71c71c71c), /* 9 */
PACK_FLOAT_128(0x3ffb745d1745d174, 0x5d1745d1745d1746), /* 11 */
PACK_FLOAT_128(0x3ffb3b13b13b13b1, 0x3b13b13b13b13b14), /* 13 */
PACK_FLOAT_128(0x3ffb111111111111, 0x1111111111111111), /* 15 */
PACK_FLOAT_128(0x3ffae1e1e1e1e1e1, 0xe1e1e1e1e1e1e1e2) /* 17 */
};
static float128 poly_ln(float128 x1, struct float_status_t *status)
{
/*
//
// 3 5 7 9 11 13 15
// 1+u u u u u u u u
// 1/2 ln --- ~ u + --- + --- + --- + --- + ---- + ---- + ---- =
// 1-u 3 5 7 9 11 13 15
//
// 2 4 6 8 10 12 14
// u u u u u u u
// = u * [ 1 + --- + --- + --- + --- + ---- + ---- + ---- ] =
// 3 5 7 9 11 13 15
//
// 3 3
// -- 4k -- 4k+2
// p(u) = > C * u q(u) = > C * u
// -- 2k -- 2k+1
// k=0 k=0
//
// 1+u 2
// 1/2 ln --- ~ u * [ p(u) + u * q(u) ]
// 1-u
//
*/
return OddPoly(x1, ln_arr, L2_ARR_SIZE, status);
}
/* required sqrt(2)/2 < x < sqrt(2) */
static float128 poly_l2(float128 x, struct float_status_t *status)
{
/* using float128 for approximation */
float128 x_p1 = float128_add(x, float128_one, status);
float128 x_m1 = float128_sub(x, float128_one, status);
x = float128_div(x_m1, x_p1, status);
x = poly_ln(x, status);
x = float128_mul(x, float128_ln2inv2, status);
return x;
}
static float128 poly_l2p1(float128 x, struct float_status_t *status)
{
/* using float128 for approximation */
float128 x_p2 = float128_add(x, float128_two, status);
x = float128_div(x, x_p2, status);
x = poly_ln(x, status);
x = float128_mul(x, float128_ln2inv2, status);
return x;
}
// =================================================
// FYL2X Compute y * log (x)
// 2
// =================================================
//
// Uses the following identities:
//
// 1. ----------------------------------------------------------
// ln(x)
// log (x) = -------, ln (x*y) = ln(x) + ln(y)
// 2 ln(2)
//
// 2. ----------------------------------------------------------
// 1+u x-1
// ln (x) = ln -----, when u = -----
// 1-u x+1
//
// 3. ----------------------------------------------------------
// 3 5 7 2n+1
// 1+u u u u u
// ln ----- = 2 [ u + --- + --- + --- + ... + ------ + ... ]
// 1-u 3 5 7 2n+1
//
floatx80 fyl2x(floatx80 a, floatx80 b, struct float_status_t *status)
{
/*----------------------------------------------------------------------------
| The pattern for a default generated extended double-precision NaN.
*----------------------------------------------------------------------------*/
const floatx80 floatx80_default_nan = packFloatx80(0, floatx80_default_nan_exp, floatx80_default_nan_fraction);
// handle unsupported extended double-precision floating encodings
if (floatx80_is_unsupported(a) || floatx80_is_unsupported(b)) {
invalid:
float_raise(status, float_flag_invalid);
return floatx80_default_nan;
}
Bit64u aSig = extractFloatx80Frac(a);
Bit32s aExp = extractFloatx80Exp(a);
int aSign = extractFloatx80Sign(a);
Bit64u bSig = extractFloatx80Frac(b);
Bit32s bExp = extractFloatx80Exp(b);
int bSign = extractFloatx80Sign(b);
int zSign = bSign ^ 1;
if (aExp == 0x7FFF) {
if ((Bit64u) (aSig<<1)
|| ((bExp == 0x7FFF) && (Bit64u) (bSig<<1)))
{
return propagateFloatx80NaN(a, b, status);
}
if (aSign) goto invalid;
else {
if (bExp == 0) {
if (bSig == 0) goto invalid;
float_raise(status, float_flag_denormal);
}
return packFloatx80(bSign, 0x7FFF, BX_CONST64(0x8000000000000000));
}
}
if (bExp == 0x7FFF)
{
if ((Bit64u) (bSig<<1)) return propagateFloatx80NaN(a, b, status);
if (aSign && (Bit64u)(aExp | aSig)) goto invalid;
if (aSig && (aExp == 0))
float_raise(status, float_flag_denormal);
if (aExp < 0x3FFF) {
return packFloatx80(zSign, 0x7FFF, BX_CONST64(0x8000000000000000));
}
if (aExp == 0x3FFF && ((Bit64u) (aSig<<1) == 0)) goto invalid;
return packFloatx80(bSign, 0x7FFF, BX_CONST64(0x8000000000000000));
}
if (aExp == 0) {
if (aSig == 0) {
if ((bExp | bSig) == 0) goto invalid;
float_raise(status, float_flag_divbyzero);
return packFloatx80(zSign, 0x7FFF, BX_CONST64(0x8000000000000000));
}
if (aSign) goto invalid;
float_raise(status, float_flag_denormal);
normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
}
if (aSign) goto invalid;
if (bExp == 0) {
if (bSig == 0) {
if (aExp < 0x3FFF) return packFloatx80(zSign, 0, 0);
return packFloatx80(bSign, 0, 0);
}
float_raise(status, float_flag_denormal);
normalizeFloatx80Subnormal(bSig, &bExp, &bSig);
}
if (aExp == 0x3FFF && ((Bit64u) (aSig<<1) == 0))
return packFloatx80(bSign, 0, 0);
float_raise(status, float_flag_inexact);
int ExpDiff = aExp - 0x3FFF;
aExp = 0;
if (aSig >= SQRT2_HALF_SIG) {
ExpDiff++;
aExp--;
}
/* ******************************** */
/* using float128 for approximation */
/* ******************************** */
Bit64u zSig0, zSig1;
shift128Right(aSig<<1, 0, 16, &zSig0, &zSig1);
float128 x = packFloat128Four(0, aExp+0x3FFF, zSig0, zSig1);
x = poly_l2(x, status);
x = float128_add(x, int64_to_float128((Bit64s) ExpDiff), status);
return floatx80_128_mul(b, x, status);
}
// =================================================
// FYL2XP1 Compute y * log (x + 1)
// 2
// =================================================
//
// Uses the following identities:
//
// 1. ----------------------------------------------------------
// ln(x)
// log (x) = -------
// 2 ln(2)
//
// 2. ----------------------------------------------------------
// 1+u x
// ln (x+1) = ln -----, when u = -----
// 1-u x+2
//
// 3. ----------------------------------------------------------
// 3 5 7 2n+1
// 1+u u u u u
// ln ----- = 2 [ u + --- + --- + --- + ... + ------ + ... ]
// 1-u 3 5 7 2n+1
//
floatx80 fyl2xp1(floatx80 a, floatx80 b, struct float_status_t *status)
{
/*----------------------------------------------------------------------------
| The pattern for a default generated extended double-precision NaN.
*----------------------------------------------------------------------------*/
const floatx80 floatx80_default_nan = packFloatx80(0, floatx80_default_nan_exp, floatx80_default_nan_fraction);
Bit32s aExp, bExp;
Bit64u aSig, bSig, zSig0, zSig1, zSig2;
int aSign, bSign;
// handle unsupported extended double-precision floating encodings
if (floatx80_is_unsupported(a) || floatx80_is_unsupported(b)) {
invalid:
float_raise(status, float_flag_invalid);
return floatx80_default_nan;
}
aSig = extractFloatx80Frac(a);
aExp = extractFloatx80Exp(a);
aSign = extractFloatx80Sign(a);
bSig = extractFloatx80Frac(b);
bExp = extractFloatx80Exp(b);
bSign = extractFloatx80Sign(b);
int zSign = aSign ^ bSign;
if (aExp == 0x7FFF) {
if ((Bit64u) (aSig<<1)
|| ((bExp == 0x7FFF) && (Bit64u) (bSig<<1)))
{
return propagateFloatx80NaN(a, b, status);
}
if (aSign) goto invalid;
else {
if (bExp == 0) {
if (bSig == 0) goto invalid;
float_raise(status, float_flag_denormal);
}
return packFloatx80(bSign, 0x7FFF, BX_CONST64(0x8000000000000000));
}
}
if (bExp == 0x7FFF)
{
if ((Bit64u) (bSig<<1))
return propagateFloatx80NaN(a, b, status);
if (aExp == 0) {
if (aSig == 0) goto invalid;
float_raise(status, float_flag_denormal);
}
return packFloatx80(zSign, 0x7FFF, BX_CONST64(0x8000000000000000));
}
if (aExp == 0) {
if (aSig == 0) {
if (bSig && (bExp == 0)) float_raise(status, float_flag_denormal);
return packFloatx80(zSign, 0, 0);
}
float_raise(status, float_flag_denormal);
normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
}
if (bExp == 0) {
if (bSig == 0) return packFloatx80(zSign, 0, 0);
float_raise(status, float_flag_denormal);
normalizeFloatx80Subnormal(bSig, &bExp, &bSig);
}
float_raise(status, float_flag_inexact);
if (aSign && aExp >= 0x3FFF)
return a;
if (aExp >= 0x3FFC) // big argument
{
return fyl2x(floatx80_add(a, floatx80_one, status), b, status);
}
// handle tiny argument
if (aExp < FLOATX80_EXP_BIAS-70)
{
// first order approximation, return (a*b)/ln(2)
Bit32s zExp = aExp + FLOAT_LN2INV_EXP - 0x3FFE;
mul128By64To192(FLOAT_LN2INV_HI, FLOAT_LN2INV_LO, aSig, &zSig0, &zSig1, &zSig2);
if (0 < (Bit64s) zSig0) {
shortShift128Left(zSig0, zSig1, 1, &zSig0, &zSig1);
--zExp;
}
zExp = zExp + bExp - 0x3FFE;
mul128By64To192(zSig0, zSig1, bSig, &zSig0, &zSig1, &zSig2);
if (0 < (Bit64s) zSig0) {
shortShift128Left(zSig0, zSig1, 1, &zSig0, &zSig1);
--zExp;
}
return
roundAndPackFloatx80(80, aSign ^ bSign, zExp, zSig0, zSig1, status);
}
/* ******************************** */
/* using float128 for approximation */
/* ******************************** */
shift128Right(aSig<<1, 0, 16, &zSig0, &zSig1);
float128 x = packFloat128Four(aSign, aExp, zSig0, zSig1);
x = poly_l2p1(x, status);
return floatx80_128_mul(b, x, status);
}

89
src/cpu/softfloat/poly.cc Normal file
View File

@@ -0,0 +1,89 @@
/*============================================================================
This source file is an extension to the SoftFloat IEC/IEEE Floating-point
Arithmetic Package, Release 2b, written for Bochs (x86 achitecture simulator)
floating point emulation.
THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has
been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
Derivative works are acceptable, even for commercial purposes, so long as
(1) the source code for the derivative work includes prominent notice that
the work is derivative, and (2) the source code includes prominent notice with
these four paragraphs for those parts of this code that are retained.
=============================================================================*/
/*============================================================================
* Written for Bochs (x86 achitecture simulator) by
* Stanislav Shwartsman [sshwarts at sourceforge net]
* ==========================================================================*/
#define FLOAT128
#include <assert.h>
#include "softfloat.h"
// 2 3 4 n
// f(x) ~ C + (C * x) + (C * x) + (C * x) + (C * x) + ... + (C * x)
// 0 1 2 3 4 n
//
// -- 2k -- 2k+1
// p(x) = > C * x q(x) = > C * x
// -- 2k -- 2k+1
//
// f(x) ~ [ p(x) + x * q(x) ]
//
float128 EvalPoly(float128 x, float128 *arr, int n, struct float_status_t *status)
{
float128 r = arr[--n];
do {
r = float128_mul(r, x, status);
r = float128_add(r, arr[--n], status);
} while (n > 0);
return r;
}
// 2 4 6 8 2n
// f(x) ~ C + (C * x) + (C * x) + (C * x) + (C * x) + ... + (C * x)
// 0 1 2 3 4 n
//
// -- 4k -- 4k+2
// p(x) = > C * x q(x) = > C * x
// -- 2k -- 2k+1
//
// 2
// f(x) ~ [ p(x) + x * q(x) ]
//
float128 EvenPoly(float128 x, float128 *arr, int n, struct float_status_t *status)
{
return EvalPoly(float128_mul(x, x, status), arr, n, status);
}
// 3 5 7 9 2n+1
// f(x) ~ (C * x) + (C * x) + (C * x) + (C * x) + (C * x) + ... + (C * x)
// 0 1 2 3 4 n
// 2 4 6 8 2n
// = x * [ C + (C * x) + (C * x) + (C * x) + (C * x) + ... + (C * x)
// 0 1 2 3 4 n
//
// -- 4k -- 4k+2
// p(x) = > C * x q(x) = > C * x
// -- 2k -- 2k+1
//
// 2
// f(x) ~ x * [ p(x) + x * q(x) ]
//
float128 OddPoly(float128 x, float128 *arr, int n, struct float_status_t *status)
{
return float128_mul(x, EvenPoly(x, arr, n, status), status);
}

View File

@@ -0,0 +1,496 @@
/*============================================================================
This C header file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic
Package, Release 2b.
Written by John R. Hauser. This work was made possible in part by the
International Computer Science Institute, located at Suite 600, 1947 Center
Street, Berkeley, California 94704. Funding was partially provided by the
National Science Foundation under grant MIP-9311980. The original version
of this code was written as part of a project to build a fixed-point vector
processor in collaboration with the University of California at Berkeley,
overseen by Profs. Nelson Morgan and John Wawrzynek. More information
is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
arithmetic/SoftFloat.html'.
THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has
been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
Derivative works are acceptable, even for commercial purposes, so long as
(1) the source code for the derivative work includes prominent notice that
the work is derivative, and (2) the source code includes prominent notice with
these four paragraphs for those parts of this code that are retained.
=============================================================================*/
/*============================================================================
* Adapted for Bochs (x86 achitecture simulator) by
* Stanislav Shwartsman [sshwarts at sourceforge net]
* ==========================================================================*/
#ifndef _SOFTFLOAT_COMPARE_H_
#define _SOFTFLOAT_COMPARE_H_
#include "softfloat.h"
// ======= float32 ======= //
typedef int (*float32_compare_method)(float32, float32, struct float_status_t *status);
// 0x00
BX_CPP_INLINE int float32_eq_ordered_quiet(float32 a, float32 b, struct float_status_t *status)
{
int relation = float32_compare_quiet(a, b, status);
return (relation == float_relation_equal);
}
// 0x01
BX_CPP_INLINE int float32_lt_ordered_signalling(float32 a, float32 b, struct float_status_t *status)
{
int relation = float32_compare_two(a, b, status);
return (relation == float_relation_less);
}
// 0x02
BX_CPP_INLINE int float32_le_ordered_signalling(float32 a, float32 b, struct float_status_t *status)
{
int relation = float32_compare_two(a, b, status);
return (relation == float_relation_less) || (relation == float_relation_equal);
}
// 0x03
BX_CPP_INLINE int float32_unordered_quiet(float32 a, float32 b, struct float_status_t *status)
{
int relation = float32_compare_quiet(a, b, status);
return (relation == float_relation_unordered);
}
// 0x04
BX_CPP_INLINE int float32_neq_unordered_quiet(float32 a, float32 b, struct float_status_t *status)
{
int relation = float32_compare_quiet(a, b, status);
return (relation != float_relation_equal);
}
// 0x05
BX_CPP_INLINE int float32_nlt_unordered_signalling(float32 a, float32 b, struct float_status_t *status)
{
int relation = float32_compare_two(a, b, status);
return (relation != float_relation_less);
}
// 0x06
BX_CPP_INLINE int float32_nle_unordered_signalling(float32 a, float32 b, struct float_status_t *status)
{
int relation = float32_compare_two(a, b, status);
return (relation != float_relation_less) && (relation != float_relation_equal);
}
// 0x07
BX_CPP_INLINE int float32_ordered_quiet(float32 a, float32 b, struct float_status_t *status)
{
int relation = float32_compare_quiet(a, b, status);
return (relation != float_relation_unordered);
}
// 0x08
BX_CPP_INLINE int float32_eq_unordered_quiet(float32 a, float32 b, struct float_status_t *status)
{
int relation = float32_compare_quiet(a, b, status);
return (relation == float_relation_equal) || (relation == float_relation_unordered);
}
// 0x09
BX_CPP_INLINE int float32_nge_unordered_signalling(float32 a, float32 b, struct float_status_t *status)
{
int relation = float32_compare_two(a, b, status);
return (relation == float_relation_less) || (relation == float_relation_unordered);
}
// 0x0a
BX_CPP_INLINE int float32_ngt_unordered_signalling(float32 a, float32 b, struct float_status_t *status)
{
int relation = float32_compare_two(a, b, status);
return (relation != float_relation_greater);
}
// 0x0b
BX_CPP_INLINE int float32_false_quiet(float32 a, float32 b, struct float_status_t *status)
{
float32_compare_quiet(a, b, status);
return 0;
}
// 0x0c
BX_CPP_INLINE int float32_neq_ordered_quiet(float32 a, float32 b, struct float_status_t *status)
{
int relation = float32_compare_quiet(a, b, status);
return (relation != float_relation_equal) && (relation != float_relation_unordered);
}
// 0x0d
BX_CPP_INLINE int float32_ge_ordered_signalling(float32 a, float32 b, struct float_status_t *status)
{
int relation = float32_compare_two(a, b, status);
return (relation == float_relation_greater) || (relation == float_relation_equal);
}
// 0x0e
BX_CPP_INLINE int float32_gt_ordered_signalling(float32 a, float32 b, struct float_status_t *status)
{
int relation = float32_compare_two(a, b, status);
return (relation == float_relation_greater);
}
// 0x0f
BX_CPP_INLINE int float32_true_quiet(float32 a, float32 b, struct float_status_t *status)
{
float32_compare_quiet(a, b, status);
return 1;
}
// 0x10
BX_CPP_INLINE int float32_eq_ordered_signalling(float32 a, float32 b, struct float_status_t *status)
{
int relation = float32_compare_two(a, b, status);
return (relation == float_relation_equal);
}
// 0x11
BX_CPP_INLINE int float32_lt_ordered_quiet(float32 a, float32 b, struct float_status_t *status)
{
int relation = float32_compare_quiet(a, b, status);
return (relation == float_relation_less);
}
// 0x12
BX_CPP_INLINE int float32_le_ordered_quiet(float32 a, float32 b, struct float_status_t *status)
{
int relation = float32_compare_quiet(a, b, status);
return (relation == float_relation_less) || (relation == float_relation_equal);
}
// 0x13
BX_CPP_INLINE int float32_unordered_signalling(float32 a, float32 b, struct float_status_t *status)
{
int relation = float32_compare_two(a, b, status);
return (relation == float_relation_unordered);
}
// 0x14
BX_CPP_INLINE int float32_neq_unordered_signalling(float32 a, float32 b, struct float_status_t *status)
{
int relation = float32_compare_two(a, b, status);
return (relation != float_relation_equal);
}
// 0x15
BX_CPP_INLINE int float32_nlt_unordered_quiet(float32 a, float32 b, struct float_status_t *status)
{
int relation = float32_compare_quiet(a, b, status);
return (relation != float_relation_less);
}
// 0x16
BX_CPP_INLINE int float32_nle_unordered_quiet(float32 a, float32 b, struct float_status_t *status)
{
int relation = float32_compare_quiet(a, b, status);
return (relation != float_relation_less) && (relation != float_relation_equal);
}
// 0x17
BX_CPP_INLINE int float32_ordered_signalling(float32 a, float32 b, struct float_status_t *status)
{
int relation = float32_compare_two(a, b, status);
return (relation != float_relation_unordered);
}
// 0x18
BX_CPP_INLINE int float32_eq_unordered_signalling(float32 a, float32 b, struct float_status_t *status)
{
int relation = float32_compare_two(a, b, status);
return (relation == float_relation_equal) || (relation == float_relation_unordered);
}
// 0x19
BX_CPP_INLINE int float32_nge_unordered_quiet(float32 a, float32 b, struct float_status_t *status)
{
int relation = float32_compare_quiet(a, b, status);
return (relation == float_relation_less) || (relation == float_relation_unordered);
}
// 0x1a
BX_CPP_INLINE int float32_ngt_unordered_quiet(float32 a, float32 b, struct float_status_t *status)
{
int relation = float32_compare_quiet(a, b, status);
return (relation != float_relation_greater);
}
// 0x1b
BX_CPP_INLINE int float32_false_signalling(float32 a, float32 b, struct float_status_t *status)
{
float32_compare_two(a, b, status);
return 0;
}
// 0x1c
BX_CPP_INLINE int float32_neq_ordered_signalling(float32 a, float32 b, struct float_status_t *status)
{
int relation = float32_compare_two(a, b, status);
return (relation != float_relation_equal) && (relation != float_relation_unordered);
}
// 0x1d
BX_CPP_INLINE int float32_ge_ordered_quiet(float32 a, float32 b, struct float_status_t *status)
{
int relation = float32_compare_quiet(a, b, status);
return (relation == float_relation_greater) || (relation == float_relation_equal);
}
// 0x1e
BX_CPP_INLINE int float32_gt_ordered_quiet(float32 a, float32 b, struct float_status_t *status)
{
int relation = float32_compare_quiet(a, b, status);
return (relation == float_relation_greater);
}
// 0x1f
BX_CPP_INLINE int float32_true_signalling(float32 a, float32 b, struct float_status_t *status)
{
float32_compare_two(a, b, status);
return 1;
}
// ======= float64 ======= //
typedef int (*float64_compare_method)(float64, float64, struct float_status_t *status);
// 0x00
BX_CPP_INLINE int float64_eq_ordered_quiet(float64 a, float64 b, struct float_status_t *status)
{
int relation = float64_compare_quiet(a, b, status);
return (relation == float_relation_equal);
}
// 0x01
BX_CPP_INLINE int float64_lt_ordered_signalling(float64 a, float64 b, struct float_status_t *status)
{
int relation = float64_compare_two(a, b, status);
return (relation == float_relation_less);
}
// 0x02
BX_CPP_INLINE int float64_le_ordered_signalling(float64 a, float64 b, struct float_status_t *status)
{
int relation = float64_compare_two(a, b, status);
return (relation == float_relation_less) || (relation == float_relation_equal);
}
// 0x03
BX_CPP_INLINE int float64_unordered_quiet(float64 a, float64 b, struct float_status_t *status)
{
int relation = float64_compare_quiet(a, b, status);
return (relation == float_relation_unordered);
}
// 0x04
BX_CPP_INLINE int float64_neq_unordered_quiet(float64 a, float64 b, struct float_status_t *status)
{
int relation = float64_compare_quiet(a, b, status);
return (relation != float_relation_equal);
}
// 0x05
BX_CPP_INLINE int float64_nlt_unordered_signalling(float64 a, float64 b, struct float_status_t *status)
{
int relation = float64_compare_two(a, b, status);
return (relation != float_relation_less);
}
// 0x06
BX_CPP_INLINE int float64_nle_unordered_signalling(float64 a, float64 b, struct float_status_t *status)
{
int relation = float64_compare_two(a, b, status);
return (relation != float_relation_less) && (relation != float_relation_equal);
}
// 0x07
BX_CPP_INLINE int float64_ordered_quiet(float64 a, float64 b, struct float_status_t *status)
{
int relation = float64_compare_quiet(a, b, status);
return (relation != float_relation_unordered);
}
// 0x08
BX_CPP_INLINE int float64_eq_unordered_quiet(float64 a, float64 b, struct float_status_t *status)
{
int relation = float64_compare_quiet(a, b, status);
return (relation == float_relation_equal) || (relation == float_relation_unordered);
}
// 0x09
BX_CPP_INLINE int float64_nge_unordered_signalling(float64 a, float64 b, struct float_status_t *status)
{
int relation = float64_compare_two(a, b, status);
return (relation == float_relation_less) || (relation == float_relation_unordered);
}
// 0x0a
BX_CPP_INLINE int float64_ngt_unordered_signalling(float64 a, float64 b, struct float_status_t *status)
{
int relation = float64_compare_two(a, b, status);
return (relation != float_relation_greater);
}
// 0x0b
BX_CPP_INLINE int float64_false_quiet(float64 a, float64 b, struct float_status_t *status)
{
float64_compare_quiet(a, b, status);
return 0;
}
// 0x0c
BX_CPP_INLINE int float64_neq_ordered_quiet(float64 a, float64 b, struct float_status_t *status)
{
int relation = float64_compare_quiet(a, b, status);
return (relation != float_relation_equal) && (relation != float_relation_unordered);
}
// 0x0d
BX_CPP_INLINE int float64_ge_ordered_signalling(float64 a, float64 b, struct float_status_t *status)
{
int relation = float64_compare_two(a, b, status);
return (relation == float_relation_greater) || (relation == float_relation_equal);
}
// 0x0e
BX_CPP_INLINE int float64_gt_ordered_signalling(float64 a, float64 b, struct float_status_t *status)
{
int relation = float64_compare_two(a, b, status);
return (relation == float_relation_greater);
}
// 0x0f
BX_CPP_INLINE int float64_true_quiet(float64 a, float64 b, struct float_status_t *status)
{
float64_compare_quiet(a, b, status);
return 1;
}
// 0x10
BX_CPP_INLINE int float64_eq_ordered_signalling(float64 a, float64 b, struct float_status_t *status)
{
int relation = float64_compare_two(a, b, status);
return (relation == float_relation_equal);
}
// 0x11
BX_CPP_INLINE int float64_lt_ordered_quiet(float64 a, float64 b, struct float_status_t *status)
{
int relation = float64_compare_quiet(a, b, status);
return (relation == float_relation_less);
}
// 0x12
BX_CPP_INLINE int float64_le_ordered_quiet(float64 a, float64 b, struct float_status_t *status)
{
int relation = float64_compare_quiet(a, b, status);
return (relation == float_relation_less) || (relation == float_relation_equal);
}
// 0x13
BX_CPP_INLINE int float64_unordered_signalling(float64 a, float64 b, struct float_status_t *status)
{
int relation = float64_compare_two(a, b, status);
return (relation == float_relation_unordered);
}
// 0x14
BX_CPP_INLINE int float64_neq_unordered_signalling(float64 a, float64 b, struct float_status_t *status)
{
int relation = float64_compare_two(a, b, status);
return (relation != float_relation_equal);
}
// 0x15
BX_CPP_INLINE int float64_nlt_unordered_quiet(float64 a, float64 b, struct float_status_t *status)
{
int relation = float64_compare_quiet(a, b, status);
return (relation != float_relation_less);
}
// 0x16
BX_CPP_INLINE int float64_nle_unordered_quiet(float64 a, float64 b, struct float_status_t *status)
{
int relation = float64_compare_quiet(a, b, status);
return (relation != float_relation_less) && (relation != float_relation_equal);
}
// 0x17
BX_CPP_INLINE int float64_ordered_signalling(float64 a, float64 b, struct float_status_t *status)
{
int relation = float64_compare_two(a, b, status);
return (relation != float_relation_unordered);
}
// 0x18
BX_CPP_INLINE int float64_eq_unordered_signalling(float64 a, float64 b, struct float_status_t *status)
{
int relation = float64_compare_two(a, b, status);
return (relation == float_relation_equal) || (relation == float_relation_unordered);
}
// 0x19
BX_CPP_INLINE int float64_nge_unordered_quiet(float64 a, float64 b, struct float_status_t *status)
{
int relation = float64_compare_quiet(a, b, status);
return (relation == float_relation_less) || (relation == float_relation_unordered);
}
// 0x1a
BX_CPP_INLINE int float64_ngt_unordered_quiet(float64 a, float64 b, struct float_status_t *status)
{
int relation = float64_compare_quiet(a, b, status);
return (relation != float_relation_greater);
}
// 0x1b
BX_CPP_INLINE int float64_false_signalling(float64 a, float64 b, struct float_status_t *status)
{
float64_compare_two(a, b, status);
return 0;
}
// 0x1c
BX_CPP_INLINE int float64_neq_ordered_signalling(float64 a, float64 b, struct float_status_t *status)
{
int relation = float64_compare_two(a, b, status);
return (relation != float_relation_equal) && (relation != float_relation_unordered);
}
// 0x1d
BX_CPP_INLINE int float64_ge_ordered_quiet(float64 a, float64 b, struct float_status_t *status)
{
int relation = float64_compare_quiet(a, b, status);
return (relation == float_relation_greater) || (relation == float_relation_equal);
}
// 0x1e
BX_CPP_INLINE int float64_gt_ordered_quiet(float64 a, float64 b, struct float_status_t *status)
{
int relation = float64_compare_quiet(a, b, status);
return (relation == float_relation_greater);
}
// 0x1f
BX_CPP_INLINE int float64_true_signalling(float64 a, float64 b, struct float_status_t *status)
{
float64_compare_two(a, b, status);
return 1;
}
#endif

View File

@@ -0,0 +1,686 @@
/*============================================================================
This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
Arithmetic Package, Release 2b.
Written by John R. Hauser. This work was made possible in part by the
International Computer Science Institute, located at Suite 600, 1947 Center
Street, Berkeley, California 94704. Funding was partially provided by the
National Science Foundation under grant MIP-9311980. The original version
of this code was written as part of a project to build a fixed-point vector
processor in collaboration with the University of California at Berkeley,
overseen by Profs. Nelson Morgan and John Wawrzynek. More information
is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
arithmetic/SoftFloat.html'.
THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has
been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
INSTITUTE (possibly via similar legal notice) AGAINST ALL LOSSES, COSTS, OR
OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
Derivative works are acceptable, even for commercial purposes, so long as
(1) the source code for the derivative work includes prominent notice that
the work is derivative, and (2) the source code includes prominent notice with
these four paragraphs for those parts of this code that are retained.
=============================================================================*/
/*============================================================================
* Adapted for Bochs (x86 achitecture simulator) by
* Stanislav Shwartsman [sshwarts at sourceforge net]
* ==========================================================================*/
#ifndef _SOFTFLOAT_MACROS_H_
#define _SOFTFLOAT_MACROS_H_
/*----------------------------------------------------------------------------
| Shifts `a' right by the number of bits given in `count'. If any nonzero
| bits are shifted off, they are ``jammed'' into the least significant bit of
| the result by setting the least significant bit to 1. The value of `count'
| can be arbitrarily large; in particular, if `count' is greater than 16, the
| result will be either 0 or 1, depending on whether `a' is zero or nonzero.
*----------------------------------------------------------------------------*/
BX_CPP_INLINE Bit16u shift16RightJamming(Bit16u a, int count)
{
Bit16u z;
if (count == 0) {
z = a;
}
else if (count < 16) {
z = (a>>count) | ((a<<((-count) & 15)) != 0);
}
else {
z = (a != 0);
}
return z;
}
/*----------------------------------------------------------------------------
| Shifts `a' right by the number of bits given in `count'. If any nonzero
| bits are shifted off, they are ``jammed'' into the least significant bit of
| the result by setting the least significant bit to 1. The value of `count'
| can be arbitrarily large; in particular, if `count' is greater than 32, the
| result will be either 0 or 1, depending on whether `a' is zero or nonzero.
*----------------------------------------------------------------------------*/
BX_CPP_INLINE Bit32u shift32RightJamming(Bit32u a, int count)
{
Bit32u z;
if (count == 0) {
z = a;
}
else if (count < 32) {
z = (a>>count) | ((a<<((-count) & 31)) != 0);
}
else {
z = (a != 0);
}
return z;
}
/*----------------------------------------------------------------------------
| Shifts `a' right by the number of bits given in `count'. If any nonzero
| bits are shifted off, they are ``jammed'' into the least significant bit of
| the result by setting the least significant bit to 1. The value of `count'
| can be arbitrarily large; in particular, if `count' is greater than 64, the
| result will be either 0 or 1, depending on whether `a' is zero or nonzero.
*----------------------------------------------------------------------------*/
BX_CPP_INLINE Bit64u shift64RightJamming(Bit64u a, int count)
{
Bit64u z;
if (count == 0) {
z = a;
}
else if (count < 64) {
z = (a>>count) | ((a << ((-count) & 63)) != 0);
}
else {
z = (a != 0);
}
return z;
}
/*----------------------------------------------------------------------------
| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64
| _plus_ the number of bits given in `count'. The shifted result is at most
| 64 nonzero bits; this is stored at the location pointed to by `z0Ptr'. The
| bits shifted off form a second 64-bit result as follows: The _last_ bit
| shifted off is the most-significant bit of the extra result, and the other
| 63 bits of the extra result are all zero if and only if _all_but_the_last_
| bits shifted off were all zero. This extra result is stored in the location
| pointed to by `z1Ptr'. The value of `count' can be arbitrarily large.
| (This routine makes more sense if `a0' and `a1' are considered to form
| a fixed-point value with binary point between `a0' and `a1'. This fixed-
| point value is shifted right by the number of bits given in `count', and
| the integer part of the result is returned at the location pointed to by
| `z0Ptr'. The fractional part of the result may be slightly corrupted as
| described above, and is returned at the location pointed to by `z1Ptr'.)
*----------------------------------------------------------------------------*/
BX_CPP_INLINE void shift64ExtraRightJamming(Bit64u a0, Bit64u a1, int count, Bit64u *z0Ptr, Bit64u *z1Ptr)
{
Bit64u z0, z1;
int negCount = (-count) & 63;
if (count == 0) {
z1 = a1;
z0 = a0;
}
else if (count < 64) {
z1 = (a0<<negCount) | (a1 != 0);
z0 = a0>>count;
}
else {
if (count == 64) {
z1 = a0 | (a1 != 0);
}
else {
z1 = ((a0 | a1) != 0);
}
z0 = 0;
}
*z1Ptr = z1;
*z0Ptr = z0;
}
/*----------------------------------------------------------------------------
| Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit
| value formed by concatenating `b0' and `b1'. Addition is modulo 2^128, so
| any carry out is lost. The result is broken into two 64-bit pieces which
| are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
*----------------------------------------------------------------------------*/
BX_CPP_INLINE void add128(Bit64u a0, Bit64u a1, Bit64u b0, Bit64u b1, Bit64u *z0Ptr, Bit64u *z1Ptr)
{
Bit64u z1 = a1 + b1;
*z1Ptr = z1;
*z0Ptr = a0 + b0 + (z1 < a1);
}
/*----------------------------------------------------------------------------
| Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the
| 128-bit value formed by concatenating `a0' and `a1'. Subtraction is modulo
| 2^128, so any borrow out (carry out) is lost. The result is broken into two
| 64-bit pieces which are stored at the locations pointed to by `z0Ptr' and
| `z1Ptr'.
*----------------------------------------------------------------------------*/
BX_CPP_INLINE void
sub128(Bit64u a0, Bit64u a1, Bit64u b0, Bit64u b1, Bit64u *z0Ptr, Bit64u *z1Ptr)
{
*z1Ptr = a1 - b1;
*z0Ptr = a0 - b0 - (a1 < b1);
}
/*----------------------------------------------------------------------------
| Multiplies `a' by `b' to obtain a 128-bit product. The product is broken
| into two 64-bit pieces which are stored at the locations pointed to by
| `z0Ptr' and `z1Ptr'.
*----------------------------------------------------------------------------*/
BX_CPP_INLINE void mul64To128(Bit64u a, Bit64u b, Bit64u *z0Ptr, Bit64u *z1Ptr)
{
Bit32u aHigh, aLow, bHigh, bLow;
Bit64u z0, zMiddleA, zMiddleB, z1;
aLow = (Bit32u) a;
aHigh = (Bit32u)(a>>32);
bLow = (Bit32u) b;
bHigh = (Bit32u)(b>>32);
z1 = ((Bit64u) aLow) * bLow;
zMiddleA = ((Bit64u) aLow) * bHigh;
zMiddleB = ((Bit64u) aHigh) * bLow;
z0 = ((Bit64u) aHigh) * bHigh;
zMiddleA += zMiddleB;
z0 += (((Bit64u) (zMiddleA < zMiddleB))<<32) + (zMiddleA>>32);
zMiddleA <<= 32;
z1 += zMiddleA;
z0 += (z1 < zMiddleA);
*z1Ptr = z1;
*z0Ptr = z0;
}
/*----------------------------------------------------------------------------
| Returns an approximation to the 64-bit integer quotient obtained by dividing
| `b' into the 128-bit value formed by concatenating `a0' and `a1'. The
| divisor `b' must be at least 2^63. If q is the exact quotient truncated
| toward zero, the approximation returned lies between q and q + 2 inclusive.
| If the exact quotient q is larger than 64 bits, the maximum positive 64-bit
| unsigned integer is returned.
*----------------------------------------------------------------------------*/
#ifdef USE_estimateDiv128To64
static Bit64u estimateDiv128To64(Bit64u a0, Bit64u a1, Bit64u b)
{
Bit64u b0, b1;
Bit64u rem0, rem1, term0, term1;
Bit64u z;
if (b <= a0) return BX_CONST64(0xFFFFFFFFFFFFFFFF);
b0 = b>>32;
z = (b0<<32 <= a0) ? BX_CONST64(0xFFFFFFFF00000000) : (a0 / b0)<<32;
mul64To128(b, z, &term0, &term1);
sub128(a0, a1, term0, term1, &rem0, &rem1);
while (((Bit64s) rem0) < 0) {
z -= BX_CONST64(0x100000000);
b1 = b<<32;
add128(rem0, rem1, b0, b1, &rem0, &rem1);
}
rem0 = (rem0<<32) | (rem1>>32);
z |= (b0<<32 <= rem0) ? 0xFFFFFFFF : rem0 / b0;
return z;
}
#endif
/*----------------------------------------------------------------------------
| Returns an approximation to the square root of the 32-bit significand given
| by `a'. Considered as an integer, `a' must be at least 2^31. If bit 0 of
| `aExp' (the least significant bit) is 1, the integer returned approximates
| 2^31*sqrt(`a'/2^31), where `a' is considered an integer. If bit 0 of `aExp'
| is 0, the integer returned approximates 2^31*sqrt(`a'/2^30). In either
| case, the approximation returned lies strictly within +/-2 of the exact
| value.
*----------------------------------------------------------------------------*/
#ifdef USE_estimateSqrt32
static Bit32u estimateSqrt32(Bit16s aExp, Bit32u a)
{
static const Bit16u sqrtOddAdjustments[] = {
0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,
0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67
};
static const Bit16u sqrtEvenAdjustments[] = {
0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,
0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002
};
Bit32u z;
int index = (a>>27) & 15;
if (aExp & 1) {
z = 0x4000 + (a>>17) - sqrtOddAdjustments[index];
z = ((a / z)<<14) + (z<<15);
a >>= 1;
}
else {
z = 0x8000 + (a>>17) - sqrtEvenAdjustments[index];
z = a / z + z;
z = (0x20000 <= z) ? 0xFFFF8000 : (z<<15);
if (z <= a) return (Bit32u) (((Bit32s) a)>>1);
}
return ((Bit32u) ((((Bit64u) a)<<31) / z)) + (z>>1);
}
#endif
static const int countLeadingZeros8[] = {
8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
#ifdef FLOAT16
/*----------------------------------------------------------------------------
| Returns the number of leading 0 bits before the most-significant 1 bit of
| `a'. If `a' is zero, 16 is returned.
*----------------------------------------------------------------------------*/
BX_CPP_INLINE int countLeadingZeros16(Bit16u a)
{
int shiftCount = 0;
if (a < 0x100) {
shiftCount += 8;
a <<= 8;
}
shiftCount += countLeadingZeros8[a>>8];
return shiftCount;
}
#endif
/*----------------------------------------------------------------------------
| Returns the number of leading 0 bits before the most-significant 1 bit of
| `a'. If `a' is zero, 32 is returned.
*----------------------------------------------------------------------------*/
BX_CPP_INLINE int countLeadingZeros32(Bit32u a)
{
int shiftCount = 0;
if (a < 0x10000) {
shiftCount += 16;
a <<= 16;
}
if (a < 0x1000000) {
shiftCount += 8;
a <<= 8;
}
shiftCount += countLeadingZeros8[a>>24];
return shiftCount;
}
/*----------------------------------------------------------------------------
| Returns the number of leading 0 bits before the most-significant 1 bit of
| `a'. If `a' is zero, 64 is returned.
*----------------------------------------------------------------------------*/
BX_CPP_INLINE int countLeadingZeros64(Bit64u a)
{
int shiftCount = 0;
if (a < BX_CONST64(0x100000000)) {
shiftCount += 32;
}
else {
a >>= 32;
}
shiftCount += countLeadingZeros32((Bit32u)(a));
return shiftCount;
}
#ifdef FLOATX80
/*----------------------------------------------------------------------------
| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
| number of bits given in `count'. Any bits shifted off are lost. The value
| of `count' can be arbitrarily large; in particular, if `count' is greater
| than 128, the result will be 0. The result is broken into two 64-bit pieces
| which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
*----------------------------------------------------------------------------*/
BX_CPP_INLINE void shift128Right(Bit64u a0, Bit64u a1, int count, Bit64u *z0Ptr, Bit64u *z1Ptr)
{
Bit64u z0, z1;
int negCount = (-count) & 63;
if (count == 0) {
z1 = a1;
z0 = a0;
}
else if (count < 64) {
z1 = (a0<<negCount) | (a1>>count);
z0 = a0>>count;
}
else {
z1 = (count < 128) ? (a0>>(count & 63)) : 0;
z0 = 0;
}
*z1Ptr = z1;
*z0Ptr = z0;
}
/*----------------------------------------------------------------------------
| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
| number of bits given in `count'. If any nonzero bits are shifted off, they
| are ``jammed'' into the least significant bit of the result by setting the
| least significant bit to 1. The value of `count' can be arbitrarily large;
| in particular, if `count' is greater than 128, the result will be either
| 0 or 1, depending on whether the concatenation of `a0' and `a1' is zero or
| nonzero. The result is broken into two 64-bit pieces which are stored at
| the locations pointed to by `z0Ptr' and `z1Ptr'.
*----------------------------------------------------------------------------*/
BX_CPP_INLINE void shift128RightJamming(Bit64u a0, Bit64u a1, int count, Bit64u *z0Ptr, Bit64u *z1Ptr)
{
Bit64u z0, z1;
int negCount = (-count) & 63;
if (count == 0) {
z1 = a1;
z0 = a0;
}
else if (count < 64) {
z1 = (a0<<negCount) | (a1>>count) | ((a1<<negCount) != 0);
z0 = a0>>count;
}
else {
if (count == 64) {
z1 = a0 | (a1 != 0);
}
else if (count < 128) {
z1 = (a0>>(count & 63)) | (((a0<<negCount) | a1) != 0);
}
else {
z1 = ((a0 | a1) != 0);
}
z0 = 0;
}
*z1Ptr = z1;
*z0Ptr = z0;
}
/*----------------------------------------------------------------------------
| Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the
| number of bits given in `count'. Any bits shifted off are lost. The value
| of `count' must be less than 64. The result is broken into two 64-bit
| pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
*----------------------------------------------------------------------------*/
BX_CPP_INLINE void shortShift128Left(Bit64u a0, Bit64u a1, int count, Bit64u *z0Ptr, Bit64u *z1Ptr)
{
*z1Ptr = a1<<count;
*z0Ptr = (count == 0) ? a0 : (a0<<count) | (a1>>((-count) & 63));
}
/*----------------------------------------------------------------------------
| Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the
| 192-bit value formed by concatenating `b0', `b1', and `b2'. Addition is
| modulo 2^192, so any carry out is lost. The result is broken into three
| 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
| `z1Ptr', and `z2Ptr'.
*----------------------------------------------------------------------------*/
BX_CPP_INLINE void add192(
Bit64u a0,
Bit64u a1,
Bit64u a2,
Bit64u b0,
Bit64u b1,
Bit64u b2,
Bit64u *z0Ptr,
Bit64u *z1Ptr,
Bit64u *z2Ptr
)
{
Bit64u z0, z1, z2;
unsigned carry0, carry1;
z2 = a2 + b2;
carry1 = (z2 < a2);
z1 = a1 + b1;
carry0 = (z1 < a1);
z0 = a0 + b0;
z1 += carry1;
z0 += (z1 < carry1);
z0 += carry0;
*z2Ptr = z2;
*z1Ptr = z1;
*z0Ptr = z0;
}
/*----------------------------------------------------------------------------
| Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2'
| from the 192-bit value formed by concatenating `a0', `a1', and `a2'.
| Subtraction is modulo 2^192, so any borrow out (carry out) is lost. The
| result is broken into three 64-bit pieces which are stored at the locations
| pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'.
*----------------------------------------------------------------------------*/
BX_CPP_INLINE void sub192(
Bit64u a0,
Bit64u a1,
Bit64u a2,
Bit64u b0,
Bit64u b1,
Bit64u b2,
Bit64u *z0Ptr,
Bit64u *z1Ptr,
Bit64u *z2Ptr
)
{
Bit64u z0, z1, z2;
unsigned borrow0, borrow1;
z2 = a2 - b2;
borrow1 = (a2 < b2);
z1 = a1 - b1;
borrow0 = (a1 < b1);
z0 = a0 - b0;
z0 -= (z1 < borrow1);
z1 -= borrow1;
z0 -= borrow0;
*z2Ptr = z2;
*z1Ptr = z1;
*z0Ptr = z0;
}
/*----------------------------------------------------------------------------
| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1'
| is equal to the 128-bit value formed by concatenating `b0' and `b1'.
| Otherwise, returns 0.
*----------------------------------------------------------------------------*/
BX_CPP_INLINE int eq128(Bit64u a0, Bit64u a1, Bit64u b0, Bit64u b1)
{
return (a0 == b0) && (a1 == b1);
}
/*----------------------------------------------------------------------------
| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
| than or equal to the 128-bit value formed by concatenating `b0' and `b1'.
| Otherwise, returns 0.
*----------------------------------------------------------------------------*/
BX_CPP_INLINE int le128(Bit64u a0, Bit64u a1, Bit64u b0, Bit64u b1)
{
return (a0 < b0) || ((a0 == b0) && (a1 <= b1));
}
/*----------------------------------------------------------------------------
| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
| than the 128-bit value formed by concatenating `b0' and `b1'. Otherwise,
| returns 0.
*----------------------------------------------------------------------------*/
BX_CPP_INLINE int lt128(Bit64u a0, Bit64u a1, Bit64u b0, Bit64u b1)
{
return (a0 < b0) || ((a0 == b0) && (a1 < b1));
}
#endif /* FLOATX80 */
/*----------------------------------------------------------------------------
| Multiplies the 128-bit value formed by concatenating `a0' and `a1' by
| `b' to obtain a 192-bit product. The product is broken into three 64-bit
| pieces which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and
| `z2Ptr'.
*----------------------------------------------------------------------------*/
BX_CPP_INLINE void mul128By64To192(
Bit64u a0,
Bit64u a1,
Bit64u b,
Bit64u *z0Ptr,
Bit64u *z1Ptr,
Bit64u *z2Ptr
)
{
Bit64u z0, z1, z2, more1;
mul64To128(a1, b, &z1, &z2);
mul64To128(a0, b, &z0, &more1);
add128(z0, more1, 0, z1, &z0, &z1);
*z2Ptr = z2;
*z1Ptr = z1;
*z0Ptr = z0;
}
#ifdef FLOAT128
/*----------------------------------------------------------------------------
| Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the
| 128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit
| product. The product is broken into four 64-bit pieces which are stored at
| the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
*----------------------------------------------------------------------------*/
BX_CPP_INLINE void mul128To256(
Bit64u a0,
Bit64u a1,
Bit64u b0,
Bit64u b1,
Bit64u *z0Ptr,
Bit64u *z1Ptr,
Bit64u *z2Ptr,
Bit64u *z3Ptr
)
{
Bit64u z0, z1, z2, z3;
Bit64u more1, more2;
mul64To128(a1, b1, &z2, &z3);
mul64To128(a1, b0, &z1, &more2);
add128(z1, more2, 0, z2, &z1, &z2);
mul64To128(a0, b0, &z0, &more1);
add128(z0, more1, 0, z1, &z0, &z1);
mul64To128(a0, b1, &more1, &more2);
add128(more1, more2, 0, z2, &more1, &z2);
add128(z0, z1, 0, more1, &z0, &z1);
*z3Ptr = z3;
*z2Ptr = z2;
*z1Ptr = z1;
*z0Ptr = z0;
}
/*----------------------------------------------------------------------------
| Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right
| by 64 _plus_ the number of bits given in `count'. The shifted result is
| at most 128 nonzero bits; these are broken into two 64-bit pieces which are
| stored at the locations pointed to by `z0Ptr' and `z1Ptr'. The bits shifted
| off form a third 64-bit result as follows: The _last_ bit shifted off is
| the most-significant bit of the extra result, and the other 63 bits of the
| extra result are all zero if and only if _all_but_the_last_ bits shifted off
| were all zero. This extra result is stored in the location pointed to by
| `z2Ptr'. The value of `count' can be arbitrarily large.
| (This routine makes more sense if `a0', `a1', and `a2' are considered
| to form a fixed-point value with binary point between `a1' and `a2'. This
| fixed-point value is shifted right by the number of bits given in `count',
| and the integer part of the result is returned at the locations pointed to
| by `z0Ptr' and `z1Ptr'. The fractional part of the result may be slightly
| corrupted as described above, and is returned at the location pointed to by
| `z2Ptr'.)
*----------------------------------------------------------------------------*/
BX_CPP_INLINE void shift128ExtraRightJamming(
Bit64u a0,
Bit64u a1,
Bit64u a2,
int count,
Bit64u *z0Ptr,
Bit64u *z1Ptr,
Bit64u *z2Ptr
)
{
Bit64u z0, z1, z2;
int negCount = (-count) & 63;
if (count == 0) {
z2 = a2;
z1 = a1;
z0 = a0;
}
else {
if (count < 64) {
z2 = a1<<negCount;
z1 = (a0<<negCount) | (a1>>count);
z0 = a0>>count;
}
else {
if (count == 64) {
z2 = a1;
z1 = a0;
}
else {
a2 |= a1;
if (count < 128) {
z2 = a0<<negCount;
z1 = a0>>(count & 63);
}
else {
z2 = (count == 128) ? a0 : (a0 != 0);
z1 = 0;
}
}
z0 = 0;
}
z2 |= (a2 != 0);
}
*z2Ptr = z2;
*z1Ptr = z1;
*z0Ptr = z0;
}
#endif /* FLOAT128 */
#endif

View File

@@ -0,0 +1,558 @@
/*============================================================================
This C source file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic
Package, Release 2b.
Written by John R. Hauser. This work was made possible in part by the
International Computer Science Institute, located at Suite 600, 1947 Center
Street, Berkeley, California 94704. Funding was partially provided by the
National Science Foundation under grant MIP-9311980. The original version
of this code was written as part of a project to build a fixed-point vector
processor in collaboration with the University of California at Berkeley,
overseen by Profs. Nelson Morgan and John Wawrzynek. More information
is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
arithmetic/SoftFloat.html'.
THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has
been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
Derivative works are acceptable, even for commercial purposes, so long as
(1) the source code for the derivative work includes prominent notice that
the work is derivative, and (2) the source code includes prominent notice with
these four paragraphs for those parts of this code that are retained.
=============================================================================*/
/*============================================================================
* This code is based on QEMU patch by Peter Maydell
* Adapted for Bochs (x86 achitecture simulator) by
* Stanislav Shwartsman [sshwarts at sourceforge net]
* ==========================================================================*/
#include "softfloat.h"
#include "softfloat-round-pack.h"
/*----------------------------------------------------------------------------
| Primitive arithmetic functions, including multi-word arithmetic, and
| division and square root approximations. (Can be specialized to target
| if desired).
*----------------------------------------------------------------------------*/
#include "softfloat-macros.h"
/*----------------------------------------------------------------------------
| Functions and definitions to determine: (1) whether tininess for underflow
| is detected before or after rounding by default, (2) what (if anything)
| happens when exceptions are raised, (3) how signaling NaNs are distinguished
| from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
| are propagated from function inputs to output. These details are target-
| specific.
*----------------------------------------------------------------------------*/
#include "softfloat-specialize.h"
/*----------------------------------------------------------------------------
| Takes three single-precision floating-point values `a', `b' and `c', one of
| which is a NaN, and returns the appropriate NaN result. If any of `a',
| `b' or `c' is a signaling NaN, the invalid exception is raised.
| The input infzero indicates whether a*b was 0*inf or inf*0 (in which case
| obviously c is a NaN, and whether to propagate c or some other NaN is
| implementation defined).
*----------------------------------------------------------------------------*/
static float32 propagateFloat32MulAddNaN(float32 a, float32 b, float32 c, struct float_status_t *status)
{
int aIsNaN = float32_is_nan(a);
int bIsNaN = float32_is_nan(b);
int aIsSignalingNaN = float32_is_signaling_nan(a);
int bIsSignalingNaN = float32_is_signaling_nan(b);
int cIsSignalingNaN = float32_is_signaling_nan(c);
a |= 0x00400000;
b |= 0x00400000;
c |= 0x00400000;
if (aIsSignalingNaN | bIsSignalingNaN | cIsSignalingNaN)
float_raise(status, float_flag_invalid);
// operate according to float_first_operand_nan mode
if (aIsSignalingNaN | aIsNaN) {
return a;
}
else {
return (bIsSignalingNaN | bIsNaN) ? b : c;
}
}
/*----------------------------------------------------------------------------
| Takes three double-precision floating-point values `a', `b' and `c', one of
| which is a NaN, and returns the appropriate NaN result. If any of `a',
| `b' or `c' is a signaling NaN, the invalid exception is raised.
| The input infzero indicates whether a*b was 0*inf or inf*0 (in which case
| obviously c is a NaN, and whether to propagate c or some other NaN is
| implementation defined).
*----------------------------------------------------------------------------*/
static float64 propagateFloat64MulAddNaN(float64 a, float64 b, float64 c, struct float_status_t *status)
{
int aIsNaN = float64_is_nan(a);
int bIsNaN = float64_is_nan(b);
int aIsSignalingNaN = float64_is_signaling_nan(a);
int bIsSignalingNaN = float64_is_signaling_nan(b);
int cIsSignalingNaN = float64_is_signaling_nan(c);
a |= BX_CONST64(0x0008000000000000);
b |= BX_CONST64(0x0008000000000000);
c |= BX_CONST64(0x0008000000000000);
if (aIsSignalingNaN | bIsSignalingNaN | cIsSignalingNaN)
float_raise(status, float_flag_invalid);
// operate according to float_first_operand_nan mode
if (aIsSignalingNaN | aIsNaN) {
return a;
}
else {
return (bIsSignalingNaN | bIsNaN) ? b : c;
}
}
/*----------------------------------------------------------------------------
| Returns the result of multiplying the single-precision floating-point values
| `a' and `b' then adding 'c', with no intermediate rounding step after the
| multiplication. The operation is performed according to the IEC/IEEE
| Standard for Binary Floating-Point Arithmetic 754-2008.
| The flags argument allows the caller to select negation of the
| addend, the intermediate product, or the final result. (The difference
| between this and having the caller do a separate negation is that negating
| externally will flip the sign bit on NaNs.)
*----------------------------------------------------------------------------*/
float32 float32_muladd(float32 a, float32 b, float32 c, int flags, struct float_status_t *status)
{
int aSign, bSign, cSign, zSign;
Bit16s aExp, bExp, cExp, pExp, zExp;
Bit32u aSig, bSig, cSig;
int pInf, pZero, pSign;
Bit64u pSig64, cSig64, zSig64;
Bit32u pSig;
int shiftcount;
aSig = extractFloat32Frac(a);
aExp = extractFloat32Exp(a);
aSign = extractFloat32Sign(a);
bSig = extractFloat32Frac(b);
bExp = extractFloat32Exp(b);
bSign = extractFloat32Sign(b);
cSig = extractFloat32Frac(c);
cExp = extractFloat32Exp(c);
cSign = extractFloat32Sign(c);
/* It is implementation-defined whether the cases of (0,inf,qnan)
* and (inf,0,qnan) raise InvalidOperation or not (and what QNaN
* they return if they do), so we have to hand this information
* off to the target-specific pick-a-NaN routine.
*/
if (((aExp == 0xff) && aSig) ||
((bExp == 0xff) && bSig) ||
((cExp == 0xff) && cSig)) {
return propagateFloat32MulAddNaN(a, b, c, status);
}
if (get_denormals_are_zeros(status)) {
if (aExp == 0) aSig = 0;
if (bExp == 0) bSig = 0;
if (cExp == 0) cSig = 0;
}
int infzero = ((aExp == 0 && aSig == 0 && bExp == 0xff && bSig == 0) ||
(aExp == 0xff && aSig == 0 && bExp == 0 && bSig == 0));
if (infzero) {
float_raise(status, float_flag_invalid);
return float32_default_nan;
}
if (flags & float_muladd_negate_c) {
cSign ^= 1;
}
/* Work out the sign and type of the product */
pSign = aSign ^ bSign;
if (flags & float_muladd_negate_product) {
pSign ^= 1;
}
pInf = (aExp == 0xff) || (bExp == 0xff);
pZero = ((aExp | aSig) == 0) || ((bExp | bSig) == 0);
if (cExp == 0xff) {
if (pInf && (pSign ^ cSign)) {
/* addition of opposite-signed infinities => InvalidOperation */
float_raise(status, float_flag_invalid);
return float32_default_nan;
}
/* Otherwise generate an infinity of the same sign */
if ((aSig && aExp == 0) || (bSig && bExp == 0)) {
float_raise(status, float_flag_denormal);
}
return packFloat32(cSign, 0xff, 0);
}
if (pInf) {
if ((aSig && aExp == 0) || (bSig && bExp == 0) || (cSig && cExp == 0)) {
float_raise(status, float_flag_denormal);
}
return packFloat32(pSign, 0xff, 0);
}
if (pZero) {
if (cExp == 0) {
if (cSig == 0) {
/* Adding two exact zeroes */
if (pSign == cSign) {
zSign = pSign;
} else if (get_float_rounding_mode(status) == float_round_down) {
zSign = 1;
} else {
zSign = 0;
}
return packFloat32(zSign, 0, 0);
}
/* Exact zero plus a denormal */
float_raise(status, float_flag_denormal);
if (get_flush_underflow_to_zero(status)) {
float_raise(status, float_flag_underflow | float_flag_inexact);
return packFloat32(cSign, 0, 0);
}
}
/* Zero plus something non-zero */
return packFloat32(cSign, cExp, cSig);
}
if (aExp == 0) {
float_raise(status, float_flag_denormal);
normalizeFloat32Subnormal(aSig, &aExp, &aSig);
}
if (bExp == 0) {
float_raise(status, float_flag_denormal);
normalizeFloat32Subnormal(bSig, &bExp, &bSig);
}
/* Calculate the actual result a * b + c */
/* Multiply first; this is easy. */
/* NB: we subtract 0x7e where float32_mul() subtracts 0x7f
* because we want the true exponent, not the "one-less-than"
* flavour that roundAndPackFloat32() takes.
*/
pExp = aExp + bExp - 0x7e;
aSig = (aSig | 0x00800000) << 7;
bSig = (bSig | 0x00800000) << 8;
pSig64 = (Bit64u)aSig * bSig;
if ((Bit64s)(pSig64 << 1) >= 0) {
pSig64 <<= 1;
pExp--;
}
zSign = pSign;
/* Now pSig64 is the significand of the multiply, with the explicit bit in
* position 62.
*/
if (cExp == 0) {
if (!cSig) {
/* Throw out the special case of c being an exact zero now */
pSig = (Bit32u) shift64RightJamming(pSig64, 32);
return roundAndPackFloat32(zSign, pExp - 1, pSig, status);
}
float_raise(status, float_flag_denormal);
normalizeFloat32Subnormal(cSig, &cExp, &cSig);
}
cSig64 = (Bit64u)cSig << 39;
cSig64 |= BX_CONST64(0x4000000000000000);
int expDiff = pExp - cExp;
if (pSign == cSign) {
/* Addition */
if (expDiff > 0) {
/* scale c to match p */
cSig64 = shift64RightJamming(cSig64, expDiff);
zExp = pExp;
} else if (expDiff < 0) {
/* scale p to match c */
pSig64 = shift64RightJamming(pSig64, -expDiff);
zExp = cExp;
} else {
/* no scaling needed */
zExp = cExp;
}
/* Add significands and make sure explicit bit ends up in posn 62 */
zSig64 = pSig64 + cSig64;
if ((Bit64s)zSig64 < 0) {
zSig64 = shift64RightJamming(zSig64, 1);
} else {
zExp--;
}
zSig64 = shift64RightJamming(zSig64, 32);
return roundAndPackFloat32(zSign, zExp, zSig64, status);
} else {
/* Subtraction */
if (expDiff > 0) {
cSig64 = shift64RightJamming(cSig64, expDiff);
zSig64 = pSig64 - cSig64;
zExp = pExp;
} else if (expDiff < 0) {
pSig64 = shift64RightJamming(pSig64, -expDiff);
zSig64 = cSig64 - pSig64;
zExp = cExp;
zSign ^= 1;
} else {
zExp = pExp;
if (cSig64 < pSig64) {
zSig64 = pSig64 - cSig64;
} else if (pSig64 < cSig64) {
zSig64 = cSig64 - pSig64;
zSign ^= 1;
} else {
/* Exact zero */
return packFloat32(get_float_rounding_mode(status) == float_round_down, 0, 0);
}
}
--zExp;
/* Do the equivalent of normalizeRoundAndPackFloat32() but
* starting with the significand in a Bit64u.
*/
shiftcount = countLeadingZeros64(zSig64) - 1;
zSig64 <<= shiftcount;
zExp -= shiftcount;
zSig64 = shift64RightJamming(zSig64, 32);
return roundAndPackFloat32(zSign, zExp, zSig64, status);
}
}
/*----------------------------------------------------------------------------
| Returns the result of multiplying the double-precision floating-point values
| `a' and `b' then adding 'c', with no intermediate rounding step after the
| multiplication. The operation is performed according to the IEC/IEEE
| Standard for Binary Floating-Point Arithmetic 754-2008.
| The flags argument allows the caller to select negation of the
| addend, the intermediate product, or the final result. (The difference
| between this and having the caller do a separate negation is that negating
| externally will flip the sign bit on NaNs.)
*----------------------------------------------------------------------------*/
float64 float64_muladd(float64 a, float64 b, float64 c, int flags, struct float_status_t *status)
{
int aSign, bSign, cSign, zSign;
Bit16s aExp, bExp, cExp, pExp, zExp;
Bit64u aSig, bSig, cSig;
int pInf, pZero, pSign;
Bit64u pSig0, pSig1, cSig0, cSig1, zSig0, zSig1;
int shiftcount;
aSig = extractFloat64Frac(a);
aExp = extractFloat64Exp(a);
aSign = extractFloat64Sign(a);
bSig = extractFloat64Frac(b);
bExp = extractFloat64Exp(b);
bSign = extractFloat64Sign(b);
cSig = extractFloat64Frac(c);
cExp = extractFloat64Exp(c);
cSign = extractFloat64Sign(c);
/* It is implementation-defined whether the cases of (0,inf,qnan)
* and (inf,0,qnan) raise InvalidOperation or not (and what QNaN
* they return if they do), so we have to hand this information
* off to the target-specific pick-a-NaN routine.
*/
if (((aExp == 0x7ff) && aSig) ||
((bExp == 0x7ff) && bSig) ||
((cExp == 0x7ff) && cSig)) {
return propagateFloat64MulAddNaN(a, b, c, status);
}
if (get_denormals_are_zeros(status)) {
if (aExp == 0) aSig = 0;
if (bExp == 0) bSig = 0;
if (cExp == 0) cSig = 0;
}
int infzero = ((aExp == 0 && aSig == 0 && bExp == 0x7ff && bSig == 0) ||
(aExp == 0x7ff && aSig == 0 && bExp == 0 && bSig == 0));
if (infzero) {
float_raise(status, float_flag_invalid);
return float64_default_nan;
}
if (flags & float_muladd_negate_c) {
cSign ^= 1;
}
/* Work out the sign and type of the product */
pSign = aSign ^ bSign;
if (flags & float_muladd_negate_product) {
pSign ^= 1;
}
pInf = (aExp == 0x7ff) || (bExp == 0x7ff);
pZero = ((aExp | aSig) == 0) || ((bExp | bSig) == 0);
if (cExp == 0x7ff) {
if (pInf && (pSign ^ cSign)) {
/* addition of opposite-signed infinities => InvalidOperation */
float_raise(status, float_flag_invalid);
return float64_default_nan;
}
/* Otherwise generate an infinity of the same sign */
if ((aSig && aExp == 0) || (bSig && bExp == 0)) {
float_raise(status, float_flag_denormal);
}
return packFloat64(cSign, 0x7ff, 0);
}
if (pInf) {
if ((aSig && aExp == 0) || (bSig && bExp == 0) || (cSig && cExp == 0)) {
float_raise(status, float_flag_denormal);
}
return packFloat64(pSign, 0x7ff, 0);
}
if (pZero) {
if (cExp == 0) {
if (cSig == 0) {
/* Adding two exact zeroes */
if (pSign == cSign) {
zSign = pSign;
} else if (get_float_rounding_mode(status) == float_round_down) {
zSign = 1;
} else {
zSign = 0;
}
return packFloat64(zSign, 0, 0);
}
/* Exact zero plus a denormal */
float_raise(status, float_flag_denormal);
if (get_flush_underflow_to_zero(status)) {
float_raise(status, float_flag_underflow | float_flag_inexact);
return packFloat64(cSign, 0, 0);
}
}
/* Zero plus something non-zero */
return packFloat64(cSign, cExp, cSig);
}
if (aExp == 0) {
float_raise(status, float_flag_denormal);
normalizeFloat64Subnormal(aSig, &aExp, &aSig);
}
if (bExp == 0) {
float_raise(status, float_flag_denormal);
normalizeFloat64Subnormal(bSig, &bExp, &bSig);
}
/* Calculate the actual result a * b + c */
/* Multiply first; this is easy. */
/* NB: we subtract 0x3fe where float64_mul() subtracts 0x3ff
* because we want the true exponent, not the "one-less-than"
* flavour that roundAndPackFloat64() takes.
*/
pExp = aExp + bExp - 0x3fe;
aSig = (aSig | BX_CONST64(0x0010000000000000))<<10;
bSig = (bSig | BX_CONST64(0x0010000000000000))<<11;
mul64To128(aSig, bSig, &pSig0, &pSig1);
if ((Bit64s)(pSig0 << 1) >= 0) {
shortShift128Left(pSig0, pSig1, 1, &pSig0, &pSig1);
pExp--;
}
zSign = pSign;
/* Now [pSig0:pSig1] is the significand of the multiply, with the explicit
* bit in position 126.
*/
if (cExp == 0) {
if (!cSig) {
/* Throw out the special case of c being an exact zero now */
shift128RightJamming(pSig0, pSig1, 64, &pSig0, &pSig1);
return roundAndPackFloat64(zSign, pExp - 1, pSig1, status);
}
float_raise(status, float_flag_denormal);
normalizeFloat64Subnormal(cSig, &cExp, &cSig);
}
cSig0 = cSig << 10;
cSig1 = 0;
cSig0 |= BX_CONST64(0x4000000000000000);
int expDiff = pExp - cExp;
if (pSign == cSign) {
/* Addition */
if (expDiff > 0) {
/* scale c to match p */
shift128RightJamming(cSig0, cSig1, expDiff, &cSig0, &cSig1);
zExp = pExp;
} else if (expDiff < 0) {
/* scale p to match c */
shift128RightJamming(pSig0, pSig1, -expDiff, &pSig0, &pSig1);
zExp = cExp;
} else {
/* no scaling needed */
zExp = cExp;
}
/* Add significands and make sure explicit bit ends up in posn 126 */
add128(pSig0, pSig1, cSig0, cSig1, &zSig0, &zSig1);
if ((Bit64s)zSig0 < 0) {
shift128RightJamming(zSig0, zSig1, 1, &zSig0, &zSig1);
} else {
zExp--;
}
shift128RightJamming(zSig0, zSig1, 64, &zSig0, &zSig1);
return roundAndPackFloat64(zSign, zExp, zSig1, status);
} else {
/* Subtraction */
if (expDiff > 0) {
shift128RightJamming(cSig0, cSig1, expDiff, &cSig0, &cSig1);
sub128(pSig0, pSig1, cSig0, cSig1, &zSig0, &zSig1);
zExp = pExp;
} else if (expDiff < 0) {
shift128RightJamming(pSig0, pSig1, -expDiff, &pSig0, &pSig1);
sub128(cSig0, cSig1, pSig0, pSig1, &zSig0, &zSig1);
zExp = cExp;
zSign ^= 1;
} else {
zExp = pExp;
if (lt128(cSig0, cSig1, pSig0, pSig1)) {
sub128(pSig0, pSig1, cSig0, cSig1, &zSig0, &zSig1);
} else if (lt128(pSig0, pSig1, cSig0, cSig1)) {
sub128(cSig0, cSig1, pSig0, pSig1, &zSig0, &zSig1);
zSign ^= 1;
} else {
/* Exact zero */
return packFloat64(get_float_rounding_mode(status) == float_round_down, 0, 0);
}
}
--zExp;
/* Do the equivalent of normalizeRoundAndPackFloat64() but
* starting with the significand in a pair of Bit64u.
*/
if (zSig0) {
shiftcount = countLeadingZeros64(zSig0) - 1;
shortShift128Left(zSig0, zSig1, shiftcount, &zSig0, &zSig1);
if (zSig1) {
zSig0 |= 1;
}
zExp -= shiftcount;
} else {
shiftcount = countLeadingZeros64(zSig1) - 1;
zSig0 = zSig1 << shiftcount;
zExp -= (shiftcount + 64);
}
return roundAndPackFloat64(zSign, zExp, zSig0, status);
}
}

View File

@@ -0,0 +1,896 @@
/*============================================================================
This C source file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic
Package, Release 2b.
Written by John R. Hauser. This work was made possible in part by the
International Computer Science Institute, located at Suite 600, 1947 Center
Street, Berkeley, California 94704. Funding was partially provided by the
National Science Foundation under grant MIP-9311980. The original version
of this code was written as part of a project to build a fixed-point vector
processor in collaboration with the University of California at Berkeley,
overseen by Profs. Nelson Morgan and John Wawrzynek. More information
is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
arithmetic/SoftFloat.html'.
THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has
been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
Derivative works are acceptable, even for commercial purposes, so long as
(1) the source code for the derivative work includes prominent notice that
the work is derivative, and (2) the source code includes prominent notice with
these four paragraphs for those parts of this code that are retained.
=============================================================================*/
#define FLOAT128
/*============================================================================
* Adapted for Bochs (x86 achitecture simulator) by
* Stanislav Shwartsman [sshwarts at sourceforge net]
* ==========================================================================*/
#include "softfloat.h"
#include "softfloat-round-pack.h"
/*----------------------------------------------------------------------------
| Primitive arithmetic functions, including multi-word arithmetic, and
| division and square root approximations. (Can be specialized to target
| if desired).
*----------------------------------------------------------------------------*/
#include "softfloat-macros.h"
/*----------------------------------------------------------------------------
| Functions and definitions to determine: (1) whether tininess for underflow
| is detected before or after rounding by default, (2) what (if anything)
| happens when exceptions are raised, (3) how signaling NaNs are distinguished
| from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
| are propagated from function inputs to output. These details are target-
| specific.
*----------------------------------------------------------------------------*/
#include "softfloat-specialize.h"
/*----------------------------------------------------------------------------
| Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
| and 7, and returns the properly rounded 32-bit integer corresponding to the
| input. If `zSign' is 1, the input is negated before being converted to an
| integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point input
| is simply rounded to an integer, with the inexact exception raised if the
| input cannot be represented exactly as an integer. However, if the fixed-
| point input is too large, the invalid exception is raised and the integer
| indefinite value is returned.
*----------------------------------------------------------------------------*/
Bit32s roundAndPackInt32(int zSign, Bit64u exactAbsZ, struct float_status_t *status)
{
int roundingMode = get_float_rounding_mode(status);
int roundNearestEven = (roundingMode == float_round_nearest_even);
int roundIncrement = 0x40;
if (! roundNearestEven) {
if (roundingMode == float_round_to_zero) roundIncrement = 0;
else {
roundIncrement = 0x7F;
if (zSign) {
if (roundingMode == float_round_up) roundIncrement = 0;
}
else {
if (roundingMode == float_round_down) roundIncrement = 0;
}
}
}
int roundBits = (int)(exactAbsZ & 0x7F);
Bit64u absZ = (exactAbsZ + roundIncrement)>>7;
absZ &= ~(((roundBits ^ 0x40) == 0) & roundNearestEven);
Bit32s z = (Bit32s) absZ;
if (zSign) z = -z;
if ((absZ>>32) || (z && ((z < 0) ^ zSign))) {
float_raise(status, float_flag_invalid);
return (Bit32s)(int32_indefinite);
}
if (roundBits) {
float_raise(status, float_flag_inexact);
if ((absZ << 7) > exactAbsZ)
set_float_rounding_up(status);
}
return z;
}
/*----------------------------------------------------------------------------
| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
| `absZ1', with binary point between bits 63 and 64 (between the input words),
| and returns the properly rounded 64-bit integer corresponding to the input.
| If `zSign' is 1, the input is negated before being converted to an integer.
| Ordinarily, the fixed-point input is simply rounded to an integer, with
| the inexact exception raised if the input cannot be represented exactly as
| an integer. However, if the fixed-point input is too large, the invalid
| exception is raised and the integer indefinite value is returned.
*----------------------------------------------------------------------------*/
Bit64s roundAndPackInt64(int zSign, Bit64u absZ0, Bit64u absZ1, struct float_status_t *status)
{
Bit64s z;
int roundingMode = get_float_rounding_mode(status);
int roundNearestEven = (roundingMode == float_round_nearest_even);
int increment = ((Bit64s) absZ1 < 0);
if (! roundNearestEven) {
if (roundingMode == float_round_to_zero) increment = 0;
else {
if (zSign) {
increment = (roundingMode == float_round_down) && absZ1;
}
else {
increment = (roundingMode == float_round_up) && absZ1;
}
}
}
Bit64u exactAbsZ0 = absZ0;
if (increment) {
++absZ0;
if (absZ0 == 0) goto overflow;
absZ0 &= ~(((Bit64u) (absZ1<<1) == 0) & roundNearestEven);
}
z = absZ0;
if (zSign) z = -z;
if (z && ((z < 0) ^ zSign)) {
overflow:
float_raise(status, float_flag_invalid);
return (Bit64s)(int64_indefinite);
}
if (absZ1) {
float_raise(status, float_flag_inexact);
if (absZ0 > exactAbsZ0)
set_float_rounding_up(status);
}
return z;
}
/*----------------------------------------------------------------------------
| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
| `absZ1', with binary point between bits 63 and 64 (between the input words),
| and returns the properly rounded 64-bit unsigned integer corresponding to the
| input. Ordinarily, the fixed-point input is simply rounded to an integer,
| with the inexact exception raised if the input cannot be represented exactly
| as an integer. However, if the fixed-point input is too large, the invalid
| exception is raised and the largest unsigned integer is returned.
*----------------------------------------------------------------------------*/
Bit64u roundAndPackUint64(int zSign, Bit64u absZ0, Bit64u absZ1, struct float_status_t *status)
{
int roundingMode = get_float_rounding_mode(status);
int roundNearestEven = (roundingMode == float_round_nearest_even);
int increment = ((Bit64s) absZ1 < 0);
if (!roundNearestEven) {
if (roundingMode == float_round_to_zero) {
increment = 0;
} else if (absZ1) {
if (zSign) {
increment = (roundingMode == float_round_down) && absZ1;
} else {
increment = (roundingMode == float_round_up) && absZ1;
}
}
}
if (increment) {
++absZ0;
if (absZ0 == 0) {
float_raise(status, float_flag_invalid);
return uint64_indefinite;
}
absZ0 &= ~(((Bit64u) (absZ1<<1) == 0) & roundNearestEven);
}
if (zSign && absZ0) {
float_raise(status, float_flag_invalid);
return uint64_indefinite;
}
if (absZ1) {
float_raise(status, float_flag_inexact);
}
return absZ0;
}
#ifdef FLOAT16
/*----------------------------------------------------------------------------
| Normalizes the subnormal half-precision floating-point value represented
| by the denormalized significand `aSig'. The normalized exponent and
| significand are stored at the locations pointed to by `zExpPtr' and
| `zSigPtr', respectively.
*----------------------------------------------------------------------------*/
void normalizeFloat16Subnormal(Bit16u aSig, Bit16s *zExpPtr, Bit16u *zSigPtr)
{
int shiftCount = countLeadingZeros16(aSig) - 5;
*zSigPtr = aSig<<shiftCount;
*zExpPtr = 1 - shiftCount;
}
/*----------------------------------------------------------------------------
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
| and significand `zSig', and returns the proper half-precision floating-
| point value corresponding to the abstract input. Ordinarily, the abstract
| value is simply rounded and packed into the half-precision format, with
| the inexact exception raised if the abstract input cannot be represented
| exactly. However, if the abstract value is too large, the overflow and
| inexact exceptions are raised and an infinity or maximal finite value is
| returned. If the abstract value is too small, the input value is rounded to
| a subnormal number, and the underflow and inexact exceptions are raised if
| the abstract input cannot be represented exactly as a subnormal single-
| precision floating-point number.
| The input significand `zSig' has its binary point between bits 14
| and 13, which is 4 bits to the left of the usual location. This shifted
| significand must be normalized or smaller. If `zSig' is not normalized,
| `zExp' must be 0; in that case, the result returned is a subnormal number,
| and it must not require rounding. In the usual case that `zSig' is
| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
| The handling of underflow and overflow follows the IEC/IEEE Standard for
| Binary Floating-Point Arithmetic.
*----------------------------------------------------------------------------*/
float16 roundAndPackFloat16(int zSign, Bit16s zExp, Bit16u zSig, struct float_status_t *status)
{
Bit16s roundIncrement, roundBits, roundMask;
int roundingMode = get_float_rounding_mode(status);
int roundNearestEven = (roundingMode == float_round_nearest_even);
roundIncrement = 8;
roundMask = 0xF;
if (! roundNearestEven) {
if (roundingMode == float_round_to_zero) roundIncrement = 0;
else {
roundIncrement = roundMask;
if (zSign) {
if (roundingMode == float_round_up) roundIncrement = 0;
}
else {
if (roundingMode == float_round_down) roundIncrement = 0;
}
}
}
roundBits = zSig & roundMask;
if (0x1D <= (Bit16u) zExp) {
if ((0x1D < zExp)
|| ((zExp == 0x1D) && ((Bit16s) (zSig + roundIncrement) < 0)))
{
float_raise(status, float_flag_overflow);
if (roundBits || float_exception_masked(status, float_flag_overflow)) {
float_raise(status, float_flag_inexact);
}
return packFloat16(zSign, 0x1F, 0) - (roundIncrement == 0);
}
if (zExp < 0) {
int isTiny = (zExp < -1) || (zSig + roundIncrement < 0x8000);
zSig = shift16RightJamming(zSig, -zExp);
zExp = 0;
roundBits = zSig & roundMask;
if (isTiny) {
if(get_flush_underflow_to_zero(status)) {
float_raise(status, float_flag_underflow | float_flag_inexact);
return packFloat16(zSign, 0, 0);
}
// signal the #P according to roundBits calculated AFTER denormalization
if (roundBits || !float_exception_masked(status, float_flag_underflow)) {
float_raise(status, float_flag_underflow);
}
}
}
}
if (roundBits) float_raise(status, float_flag_inexact);
Bit16u zSigRound = ((zSig + roundIncrement) & ~roundMask) >> 4;
zSigRound &= ~(((roundBits ^ 0x10) == 0) & roundNearestEven);
if (zSigRound == 0) zExp = 0;
return packFloat16(zSign, zExp, zSigRound);
}
#endif
/*----------------------------------------------------------------------------
| Normalizes the subnormal single-precision floating-point value represented
| by the denormalized significand `aSig'. The normalized exponent and
| significand are stored at the locations pointed to by `zExpPtr' and
| `zSigPtr', respectively.
*----------------------------------------------------------------------------*/
void normalizeFloat32Subnormal(Bit32u aSig, Bit16s *zExpPtr, Bit32u *zSigPtr)
{
int shiftCount = countLeadingZeros32(aSig) - 8;
*zSigPtr = aSig<<shiftCount;
*zExpPtr = 1 - shiftCount;
}
/*----------------------------------------------------------------------------
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
| and significand `zSig', and returns the proper single-precision floating-
| point value corresponding to the abstract input. Ordinarily, the abstract
| value is simply rounded and packed into the single-precision format, with
| the inexact exception raised if the abstract input cannot be represented
| exactly. However, if the abstract value is too large, the overflow and
| inexact exceptions are raised and an infinity or maximal finite value is
| returned. If the abstract value is too small, the input value is rounded to
| a subnormal number, and the underflow and inexact exceptions are raised if
| the abstract input cannot be represented exactly as a subnormal single-
| precision floating-point number.
| The input significand `zSig' has its binary point between bits 30
| and 29, which is 7 bits to the left of the usual location. This shifted
| significand must be normalized or smaller. If `zSig' is not normalized,
| `zExp' must be 0; in that case, the result returned is a subnormal number,
| and it must not require rounding. In the usual case that `zSig' is
| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
| The handling of underflow and overflow follows the IEC/IEEE Standard for
| Binary Floating-Point Arithmetic.
*----------------------------------------------------------------------------*/
float32 roundAndPackFloat32(int zSign, Bit16s zExp, Bit32u zSig, struct float_status_t *status)
{
Bit32s roundIncrement, roundBits;
const Bit32s roundMask = 0x7F;
int roundingMode = get_float_rounding_mode(status);
int roundNearestEven = (roundingMode == float_round_nearest_even);
roundIncrement = 0x40;
if (! roundNearestEven) {
if (roundingMode == float_round_to_zero) roundIncrement = 0;
else {
roundIncrement = roundMask;
if (zSign) {
if (roundingMode == float_round_up) roundIncrement = 0;
}
else {
if (roundingMode == float_round_down) roundIncrement = 0;
}
}
}
roundBits = zSig & roundMask;
if (0xFD <= (Bit16u) zExp) {
if ((0xFD < zExp)
|| ((zExp == 0xFD) && ((Bit32s) (zSig + roundIncrement) < 0)))
{
float_raise(status, float_flag_overflow);
if (roundBits || float_exception_masked(status, float_flag_overflow)) {
float_raise(status, float_flag_inexact);
if (roundIncrement != 0) set_float_rounding_up(status);
}
return packFloat32(zSign, 0xFF, 0) - (roundIncrement == 0);
}
if (zExp < 0) {
int isTiny = (zExp < -1) || (zSig + roundIncrement < 0x80000000);
if (isTiny) {
if (!float_exception_masked(status, float_flag_underflow)) {
float_raise(status, float_flag_underflow);
zExp += 192; // bias unmasked underflow
}
}
if (zExp < 0) {
zSig = shift32RightJamming(zSig, -zExp);
zExp = 0;
roundBits = zSig & roundMask;
if (isTiny) {
// masked underflow
if(get_flush_underflow_to_zero(status)) {
float_raise(status, float_flag_underflow | float_flag_inexact);
return packFloat32(zSign, 0, 0);
}
if (roundBits) float_raise(status, float_flag_underflow);
}
}
}
}
Bit32u zSigRound = ((zSig + roundIncrement) & ~roundMask) >> 7;
zSigRound &= ~(((roundBits ^ 0x40) == 0) & roundNearestEven);
if (zSigRound == 0) zExp = 0;
if (roundBits) {
float_raise(status, float_flag_inexact);
if ((zSigRound << 7) > zSig) set_float_rounding_up(status);
}
return packFloat32(zSign, zExp, zSigRound);
}
/*----------------------------------------------------------------------------
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
| and significand `zSig', and returns the proper single-precision floating-
| point value corresponding to the abstract input. This routine is just like
| `roundAndPackFloat32' except that `zSig' does not have to be normalized.
| Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
| floating-point exponent.
*----------------------------------------------------------------------------*/
float32 normalizeRoundAndPackFloat32(int zSign, Bit16s zExp, Bit32u zSig, struct float_status_t *status)
{
int shiftCount = countLeadingZeros32(zSig) - 1;
return roundAndPackFloat32(zSign, zExp - shiftCount, zSig<<shiftCount, status);
}
/*----------------------------------------------------------------------------
| Normalizes the subnormal double-precision floating-point value represented
| by the denormalized significand `aSig'. The normalized exponent and
| significand are stored at the locations pointed to by `zExpPtr' and
| `zSigPtr', respectively.
*----------------------------------------------------------------------------*/
void normalizeFloat64Subnormal(Bit64u aSig, Bit16s *zExpPtr, Bit64u *zSigPtr)
{
int shiftCount = countLeadingZeros64(aSig) - 11;
*zSigPtr = aSig<<shiftCount;
*zExpPtr = 1 - shiftCount;
}
/*----------------------------------------------------------------------------
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
| and significand `zSig', and returns the proper double-precision floating-
| point value corresponding to the abstract input. Ordinarily, the abstract
| value is simply rounded and packed into the double-precision format, with
| the inexact exception raised if the abstract input cannot be represented
| exactly. However, if the abstract value is too large, the overflow and
| inexact exceptions are raised and an infinity or maximal finite value is
| returned. If the abstract value is too small, the input value is rounded
| to a subnormal number, and the underflow and inexact exceptions are raised
| if the abstract input cannot be represented exactly as a subnormal double-
| precision floating-point number.
| The input significand `zSig' has its binary point between bits 62
| and 61, which is 10 bits to the left of the usual location. This shifted
| significand must be normalized or smaller. If `zSig' is not normalized,
| `zExp' must be 0; in that case, the result returned is a subnormal number,
| and it must not require rounding. In the usual case that `zSig' is
| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
| The handling of underflow and overflow follows the IEC/IEEE Standard for
| Binary Floating-Point Arithmetic.
*----------------------------------------------------------------------------*/
float64 roundAndPackFloat64(int zSign, Bit16s zExp, Bit64u zSig, struct float_status_t *status)
{
Bit16s roundIncrement, roundBits;
const Bit16s roundMask = 0x3FF;
int roundingMode = get_float_rounding_mode(status);
int roundNearestEven = (roundingMode == float_round_nearest_even);
roundIncrement = 0x200;
if (! roundNearestEven) {
if (roundingMode == float_round_to_zero) roundIncrement = 0;
else {
roundIncrement = roundMask;
if (zSign) {
if (roundingMode == float_round_up) roundIncrement = 0;
}
else {
if (roundingMode == float_round_down) roundIncrement = 0;
}
}
}
roundBits = (Bit16s)(zSig & roundMask);
if (0x7FD <= (Bit16u) zExp) {
if ((0x7FD < zExp)
|| ((zExp == 0x7FD)
&& ((Bit64s) (zSig + roundIncrement) < 0)))
{
float_raise(status, float_flag_overflow);
if (roundBits || float_exception_masked(status, float_flag_overflow)) {
float_raise(status, float_flag_inexact);
if (roundIncrement != 0) set_float_rounding_up(status);
}
return packFloat64(zSign, 0x7FF, 0) - (roundIncrement == 0);
}
if (zExp < 0) {
int isTiny = (zExp < -1) || (zSig + roundIncrement < BX_CONST64(0x8000000000000000));
if (isTiny) {
if (!float_exception_masked(status, float_flag_underflow)) {
float_raise(status, float_flag_underflow);
zExp += 1536; // bias unmasked underflow
}
}
if (zExp < 0) {
zSig = shift64RightJamming(zSig, -zExp);
zExp = 0;
roundBits = (Bit16s)(zSig & roundMask);
if (isTiny) {
// masked underflow
if(get_flush_underflow_to_zero(status)) {
float_raise(status, float_flag_underflow | float_flag_inexact);
return packFloat64(zSign, 0, 0);
}
if (roundBits) float_raise(status, float_flag_underflow);
}
}
}
}
Bit64u zSigRound = (zSig + roundIncrement)>>10;
zSigRound &= ~(((roundBits ^ 0x200) == 0) & roundNearestEven);
if (zSigRound == 0) zExp = 0;
if (roundBits) {
float_raise(status, float_flag_inexact);
if ((zSigRound << 10) > zSig) set_float_rounding_up(status);
}
return packFloat64(zSign, zExp, zSigRound);
}
/*----------------------------------------------------------------------------
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
| and significand `zSig', and returns the proper double-precision floating-
| point value corresponding to the abstract input. This routine is just like
| `roundAndPackFloat64' except that `zSig' does not have to be normalized.
| Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
| floating-point exponent.
*----------------------------------------------------------------------------*/
float64 normalizeRoundAndPackFloat64(int zSign, Bit16s zExp, Bit64u zSig, struct float_status_t *status)
{
int shiftCount = countLeadingZeros64(zSig) - 1;
return roundAndPackFloat64(zSign, zExp - shiftCount, zSig<<shiftCount, status);
}
#ifdef FLOATX80
/*----------------------------------------------------------------------------
| Normalizes the subnormal extended double-precision floating-point value
| represented by the denormalized significand `aSig'. The normalized exponent
| and significand are stored at the locations pointed to by `zExpPtr' and
| `zSigPtr', respectively.
*----------------------------------------------------------------------------*/
void normalizeFloatx80Subnormal(Bit64u aSig, Bit32s *zExpPtr, Bit64u *zSigPtr)
{
int shiftCount = countLeadingZeros64(aSig);
*zSigPtr = aSig<<shiftCount;
*zExpPtr = 1 - shiftCount;
}
/*----------------------------------------------------------------------------
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
| and extended significand formed by the concatenation of `zSig0' and `zSig1',
| and returns the proper extended double-precision floating-point value
| corresponding to the abstract input. Ordinarily, the abstract value is
| rounded and packed into the extended double-precision format, with the
| inexact exception raised if the abstract input cannot be represented
| exactly. However, if the abstract value is too large, the overflow and
| inexact exceptions are raised and an infinity or maximal finite value is
| returned. If the abstract value is too small, the input value is rounded to
| a subnormal number, and the underflow and inexact exceptions are raised if
| the abstract input cannot be represented exactly as a subnormal extended
| double-precision floating-point number.
| If `roundingPrecision' is 32 or 64, the result is rounded to the same
| number of bits as single or double precision, respectively. Otherwise, the
| result is rounded to the full precision of the extended double-precision
| format.
| The input significand must be normalized or smaller. If the input
| significand is not normalized, `zExp' must be 0; in that case, the result
| returned is a subnormal number, and it must not require rounding. The
| handling of underflow and overflow follows the IEC/IEEE Standard for Binary
| Floating-Point Arithmetic.
*----------------------------------------------------------------------------*/
floatx80 SoftFloatRoundAndPackFloatx80(int roundingPrecision,
int zSign, Bit32s zExp, Bit64u zSig0, Bit64u zSig1, struct float_status_t *status)
{
Bit64u roundIncrement, roundMask, roundBits;
int increment;
Bit64u zSigExact; /* support rounding-up response */
Bit8u roundingMode = get_float_rounding_mode(status);
int roundNearestEven = (roundingMode == float_round_nearest_even);
if (roundingPrecision == 64) {
roundIncrement = BX_CONST64(0x0000000000000400);
roundMask = BX_CONST64(0x00000000000007FF);
}
else if (roundingPrecision == 32) {
roundIncrement = BX_CONST64(0x0000008000000000);
roundMask = BX_CONST64(0x000000FFFFFFFFFF);
}
else goto precision80;
zSig0 |= (zSig1 != 0);
if (! roundNearestEven) {
if (roundingMode == float_round_to_zero) roundIncrement = 0;
else {
roundIncrement = roundMask;
if (zSign) {
if (roundingMode == float_round_up) roundIncrement = 0;
}
else {
if (roundingMode == float_round_down) roundIncrement = 0;
}
}
}
roundBits = zSig0 & roundMask;
if (0x7FFD <= (Bit32u) (zExp - 1)) {
if ((0x7FFE < zExp)
|| ((zExp == 0x7FFE) && (zSig0 + roundIncrement < zSig0)))
{
goto overflow;
}
if (zExp <= 0) {
int isTiny = (zExp < 0) || (zSig0 <= zSig0 + roundIncrement);
zSig0 = shift64RightJamming(zSig0, 1 - zExp);
zSigExact = zSig0;
zExp = 0;
roundBits = zSig0 & roundMask;
if (isTiny) {
if (roundBits || (zSig0 && !float_exception_masked(status, float_flag_underflow)))
float_raise(status, float_flag_underflow);
}
zSig0 += roundIncrement;
if ((Bit64s) zSig0 < 0) zExp = 1;
roundIncrement = roundMask + 1;
if (roundNearestEven && (roundBits<<1 == roundIncrement))
roundMask |= roundIncrement;
zSig0 &= ~roundMask;
if (roundBits) {
float_raise(status, float_flag_inexact);
if (zSig0 > zSigExact) set_float_rounding_up(status);
}
return packFloatx80(zSign, zExp, zSig0);
}
}
if (roundBits) float_raise(status, float_flag_inexact);
zSigExact = zSig0;
zSig0 += roundIncrement;
if (zSig0 < roundIncrement) {
// Basically scale by shifting right and keep overflow
++zExp;
zSig0 = BX_CONST64(0x8000000000000000);
zSigExact >>= 1; // must scale also, or else later tests will fail
}
roundIncrement = roundMask + 1;
if (roundNearestEven && (roundBits<<1 == roundIncrement))
roundMask |= roundIncrement;
zSig0 &= ~roundMask;
if (zSig0 > zSigExact) set_float_rounding_up(status);
if (zSig0 == 0) zExp = 0;
return packFloatx80(zSign, zExp, zSig0);
precision80:
increment = ((Bit64s) zSig1 < 0);
if (! roundNearestEven) {
if (roundingMode == float_round_to_zero) increment = 0;
else {
if (zSign) {
increment = (roundingMode == float_round_down) && zSig1;
}
else {
increment = (roundingMode == float_round_up) && zSig1;
}
}
}
if (0x7FFD <= (Bit32u) (zExp - 1)) {
if ((0x7FFE < zExp)
|| ((zExp == 0x7FFE)
&& (zSig0 == BX_CONST64(0xFFFFFFFFFFFFFFFF))
&& increment))
{
roundMask = 0;
overflow:
float_raise(status, float_flag_overflow | float_flag_inexact);
if ((roundingMode == float_round_to_zero)
|| (zSign && (roundingMode == float_round_up))
|| (! zSign && (roundingMode == float_round_down)))
{
return packFloatx80(zSign, 0x7FFE, ~roundMask);
}
set_float_rounding_up(status);
return packFloatx80(zSign, 0x7FFF, BX_CONST64(0x8000000000000000));
}
if (zExp <= 0) {
int isTiny = (zExp < 0) || (! increment)
|| (zSig0 < BX_CONST64(0xFFFFFFFFFFFFFFFF));
shift64ExtraRightJamming(zSig0, zSig1, 1 - zExp, &zSig0, &zSig1);
zExp = 0;
if (isTiny) {
if (zSig1 || (zSig0 && !float_exception_masked(status, float_flag_underflow)))
float_raise(status, float_flag_underflow);
}
if (zSig1) float_raise(status, float_flag_inexact);
if (roundNearestEven) increment = ((Bit64s) zSig1 < 0);
else {
if (zSign) {
increment = (roundingMode == float_round_down) && zSig1;
} else {
increment = (roundingMode == float_round_up) && zSig1;
}
}
if (increment) {
zSigExact = zSig0++;
zSig0 &= ~(((Bit64u) (zSig1<<1) == 0) & roundNearestEven);
if (zSig0 > zSigExact) set_float_rounding_up(status);
if ((Bit64s) zSig0 < 0) zExp = 1;
}
return packFloatx80(zSign, zExp, zSig0);
}
}
if (zSig1) float_raise(status, float_flag_inexact);
if (increment) {
zSigExact = zSig0++;
if (zSig0 == 0) {
zExp++;
zSig0 = BX_CONST64(0x8000000000000000);
zSigExact >>= 1; // must scale also, or else later tests will fail
}
else {
zSig0 &= ~(((Bit64u) (zSig1<<1) == 0) & roundNearestEven);
}
if (zSig0 > zSigExact) set_float_rounding_up(status);
}
else {
if (zSig0 == 0) zExp = 0;
}
return packFloatx80(zSign, zExp, zSig0);
}
floatx80 roundAndPackFloatx80(int roundingPrecision,
int zSign, Bit32s zExp, Bit64u zSig0, Bit64u zSig1, struct float_status_t *status)
{
struct float_status_t *round_status = status;
floatx80 result = SoftFloatRoundAndPackFloatx80(roundingPrecision, zSign, zExp, zSig0, zSig1, status);
// bias unmasked undeflow
if (status->float_exception_flags & ~status->float_exception_masks & float_flag_underflow) {
float_raise(round_status, float_flag_underflow);
return SoftFloatRoundAndPackFloatx80(roundingPrecision, zSign, zExp + 0x6000, zSig0, zSig1, status = round_status);
}
// bias unmasked overflow
if (status->float_exception_flags & ~status->float_exception_masks & float_flag_overflow) {
float_raise(round_status, float_flag_overflow);
return SoftFloatRoundAndPackFloatx80(roundingPrecision, zSign, zExp - 0x6000, zSig0, zSig1, status = round_status);
}
return result;
}
/*----------------------------------------------------------------------------
| Takes an abstract floating-point value having sign `zSign', exponent
| `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
| and returns the proper extended double-precision floating-point value
| corresponding to the abstract input. This routine is just like
| `roundAndPackFloatx80' except that the input significand does not have to be
| normalized.
*----------------------------------------------------------------------------*/
floatx80 normalizeRoundAndPackFloatx80(int roundingPrecision,
int zSign, Bit32s zExp, Bit64u zSig0, Bit64u zSig1, struct float_status_t *status)
{
if (zSig0 == 0) {
zSig0 = zSig1;
zSig1 = 0;
zExp -= 64;
}
int shiftCount = countLeadingZeros64(zSig0);
shortShift128Left(zSig0, zSig1, shiftCount, &zSig0, &zSig1);
zExp -= shiftCount;
return
roundAndPackFloatx80(roundingPrecision, zSign, zExp, zSig0, zSig1, status);
}
#endif
#ifdef FLOAT128
/*----------------------------------------------------------------------------
| Normalizes the subnormal quadruple-precision floating-point value
| represented by the denormalized significand formed by the concatenation of
| `aSig0' and `aSig1'. The normalized exponent is stored at the location
| pointed to by `zExpPtr'. The most significant 49 bits of the normalized
| significand are stored at the location pointed to by `zSig0Ptr', and the
| least significant 64 bits of the normalized significand are stored at the
| location pointed to by `zSig1Ptr'.
*----------------------------------------------------------------------------*/
void normalizeFloat128Subnormal(
Bit64u aSig0, Bit64u aSig1, Bit32s *zExpPtr, Bit64u *zSig0Ptr, Bit64u *zSig1Ptr)
{
int shiftCount;
if (aSig0 == 0) {
shiftCount = countLeadingZeros64(aSig1) - 15;
if (shiftCount < 0) {
*zSig0Ptr = aSig1 >>(-shiftCount);
*zSig1Ptr = aSig1 << (shiftCount & 63);
}
else {
*zSig0Ptr = aSig1 << shiftCount;
*zSig1Ptr = 0;
}
*zExpPtr = - shiftCount - 63;
}
else {
shiftCount = countLeadingZeros64(aSig0) - 15;
shortShift128Left(aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr);
*zExpPtr = 1 - shiftCount;
}
}
/*----------------------------------------------------------------------------
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
| and extended significand formed by the concatenation of `zSig0', `zSig1',
| and `zSig2', and returns the proper quadruple-precision floating-point value
| corresponding to the abstract input. Ordinarily, the abstract value is
| simply rounded and packed into the quadruple-precision format, with the
| inexact exception raised if the abstract input cannot be represented
| exactly. However, if the abstract value is too large, the overflow and
| inexact exceptions are raised and an infinity or maximal finite value is
| returned. If the abstract value is too small, the input value is rounded to
| a subnormal number, and the underflow and inexact exceptions are raised if
| the abstract input cannot be represented exactly as a subnormal quadruple-
| precision floating-point number.
| The input significand must be normalized or smaller. If the input
| significand is not normalized, `zExp' must be 0; in that case, the result
| returned is a subnormal number, and it must not require rounding. In the
| usual case that the input significand is normalized, `zExp' must be 1 less
| than the ``true'' floating-point exponent. The handling of underflow and
| overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
*----------------------------------------------------------------------------*/
float128 roundAndPackFloat128(
int zSign, Bit32s zExp, Bit64u zSig0, Bit64u zSig1, Bit64u zSig2, struct float_status_t *status)
{
int increment = ((Bit64s) zSig2 < 0);
if (0x7FFD <= (Bit32u) zExp) {
if ((0x7FFD < zExp)
|| ((zExp == 0x7FFD)
&& eq128(BX_CONST64(0x0001FFFFFFFFFFFF),
BX_CONST64(0xFFFFFFFFFFFFFFFF), zSig0, zSig1)
&& increment))
{
float_raise(status, float_flag_overflow | float_flag_inexact);
return packFloat128Four(zSign, 0x7FFF, 0, 0);
}
if (zExp < 0) {
int isTiny = (zExp < -1)
|| ! increment
|| lt128(zSig0, zSig1,
BX_CONST64(0x0001FFFFFFFFFFFF),
BX_CONST64(0xFFFFFFFFFFFFFFFF));
shift128ExtraRightJamming(
zSig0, zSig1, zSig2, -zExp, &zSig0, &zSig1, &zSig2);
zExp = 0;
if (isTiny && zSig2) float_raise(status, float_flag_underflow);
increment = ((Bit64s) zSig2 < 0);
}
}
if (zSig2) float_raise(status, float_flag_inexact);
if (increment) {
add128(zSig0, zSig1, 0, 1, &zSig0, &zSig1);
zSig1 &= ~((zSig2 + zSig2 == 0) & 1);
}
else {
if ((zSig0 | zSig1) == 0) zExp = 0;
}
return packFloat128Four(zSign, zExp, zSig0, zSig1);
}
/*----------------------------------------------------------------------------
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
| and significand formed by the concatenation of `zSig0' and `zSig1', and
| returns the proper quadruple-precision floating-point value corresponding
| to the abstract input. This routine is just like `roundAndPackFloat128'
| except that the input significand has fewer bits and does not have to be
| normalized. In all cases, `zExp' must be 1 less than the ``true'' floating-
| point exponent.
*----------------------------------------------------------------------------*/
float128 normalizeRoundAndPackFloat128(
int zSign, Bit32s zExp, Bit64u zSig0, Bit64u zSig1, struct float_status_t *status)
{
Bit64u zSig2;
if (zSig0 == 0) {
zSig0 = zSig1;
zSig1 = 0;
zExp -= 64;
}
int shiftCount = countLeadingZeros64(zSig0) - 15;
if (0 <= shiftCount) {
zSig2 = 0;
shortShift128Left(zSig0, zSig1, shiftCount, &zSig0, &zSig1);
}
else {
shift128ExtraRightJamming(
zSig0, zSig1, 0, -shiftCount, &zSig0, &zSig1, &zSig2);
}
zExp -= shiftCount;
return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
}
#endif

View File

@@ -0,0 +1,309 @@
/*============================================================================
This C source file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic
Package, Release 2b.
Written by John R. Hauser. This work was made possible in part by the
International Computer Science Institute, located at Suite 600, 1947 Center
Street, Berkeley, California 94704. Funding was partially provided by the
National Science Foundation under grant MIP-9311980. The original version
of this code was written as part of a project to build a fixed-point vector
processor in collaboration with the University of California at Berkeley,
overseen by Profs. Nelson Morgan and John Wawrzynek. More information
is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
arithmetic/SoftFloat.html'.
THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has
been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
Derivative works are acceptable, even for commercial purposes, so long as
(1) the source code for the derivative work includes prominent notice that
the work is derivative, and (2) the source code includes prominent notice with
these four paragraphs for those parts of this code that are retained.
=============================================================================*/
/*============================================================================
* Adapted for Bochs (x86 achitecture simulator) by
* Stanislav Shwartsman [sshwarts at sourceforge net]
* ==========================================================================*/
#ifndef _SOFTFLOAT_ROUND_PACK_H_
#define _SOFTFLOAT_ROUND_PACK_H_
#include "softfloat.h"
/*----------------------------------------------------------------------------
| Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
| and 7, and returns the properly rounded 32-bit integer corresponding to the
| input. If `zSign' is 1, the input is negated before being converted to an
| integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point input
| is simply rounded to an integer, with the inexact exception raised if the
| input cannot be represented exactly as an integer. However, if the fixed-
| point input is too large, the invalid exception is raised and the integer
| indefinite value is returned.
*----------------------------------------------------------------------------*/
Bit32s roundAndPackInt32(int zSign, Bit64u absZ, struct float_status_t *status);
/*----------------------------------------------------------------------------
| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
| `absZ1', with binary point between bits 63 and 64 (between the input words),
| and returns the properly rounded 64-bit integer corresponding to the input.
| If `zSign' is 1, the input is negated before being converted to an integer.
| Ordinarily, the fixed-point input is simply rounded to an integer, with
| the inexact exception raised if the input cannot be represented exactly as
| an integer. However, if the fixed-point input is too large, the invalid
| exception is raised and the integer indefinite value is returned.
*----------------------------------------------------------------------------*/
Bit64s roundAndPackInt64(int zSign, Bit64u absZ0, Bit64u absZ1, struct float_status_t *status);
/*----------------------------------------------------------------------------
| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
| `absZ1', with binary point between bits 63 and 64 (between the input words),
| and returns the properly rounded 64-bit unsigned integer corresponding to the
| input. Ordinarily, the fixed-point input is simply rounded to an integer,
| with the inexact exception raised if the input cannot be represented exactly
| as an integer. However, if the fixed-point input is too large, the invalid
| exception is raised and the largest unsigned integer is returned.
*----------------------------------------------------------------------------*/
Bit64u roundAndPackUint64(int zSign, Bit64u absZ0, Bit64u absZ1, struct float_status_t *status);
#ifdef FLOAT16
/*----------------------------------------------------------------------------
| Normalizes the subnormal half-precision floating-point value represented
| by the denormalized significand `aSig'. The normalized exponent and
| significand are stored at the locations pointed to by `zExpPtr' and
| `zSigPtr', respectively.
*----------------------------------------------------------------------------*/
void normalizeFloat16Subnormal(Bit16u aSig, Bit16s *zExpPtr, Bit16u *zSigPtr);
/*----------------------------------------------------------------------------
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
| and significand `zSig', and returns the proper half-precision floating-
| point value corresponding to the abstract input. Ordinarily, the abstract
| value is simply rounded and packed into the half-precision format, with
| the inexact exception raised if the abstract input cannot be represented
| exactly. However, if the abstract value is too large, the overflow and
| inexact exceptions are raised and an infinity or maximal finite value is
| returned. If the abstract value is too small, the input value is rounded to
| a subnormal number, and the underflow and inexact exceptions are raised if
| the abstract input cannot be represented exactly as a subnormal single-
| precision floating-point number.
| The input significand `zSig' has its binary point between bits 14
| and 13, which is 4 bits to the left of the usual location. This shifted
| significand must be normalized or smaller. If `zSig' is not normalized,
| `zExp' must be 0; in that case, the result returned is a subnormal number,
| and it must not require rounding. In the usual case that `zSig' is
| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
| The handling of underflow and overflow follows the IEC/IEEE Standard for
| Binary Floating-Point Arithmetic.
*----------------------------------------------------------------------------*/
float16 roundAndPackFloat16(int zSign, Bit16s zExp, Bit16u zSig, struct float_status_t *status);
#endif
/*----------------------------------------------------------------------------
| Normalizes the subnormal single-precision floating-point value represented
| by the denormalized significand `aSig'. The normalized exponent and
| significand are stored at the locations pointed to by `zExpPtr' and
| `zSigPtr', respectively.
*----------------------------------------------------------------------------*/
void normalizeFloat32Subnormal(Bit32u aSig, Bit16s *zExpPtr, Bit32u *zSigPtr);
/*----------------------------------------------------------------------------
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
| and significand `zSig', and returns the proper single-precision floating-
| point value corresponding to the abstract input. Ordinarily, the abstract
| value is simply rounded and packed into the single-precision format, with
| the inexact exception raised if the abstract input cannot be represented
| exactly. However, if the abstract value is too large, the overflow and
| inexact exceptions are raised and an infinity or maximal finite value is
| returned. If the abstract value is too small, the input value is rounded to
| a subnormal number, and the underflow and inexact exceptions are raised if
| the abstract input cannot be represented exactly as a subnormal single-
| precision floating-point number.
| The input significand `zSig' has its binary point between bits 30
| and 29, which is 7 bits to the left of the usual location. This shifted
| significand must be normalized or smaller. If `zSig' is not normalized,
| `zExp' must be 0; in that case, the result returned is a subnormal number,
| and it must not require rounding. In the usual case that `zSig' is
| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
| The handling of underflow and overflow follows the IEC/IEEE Standard for
| Binary Floating-Point Arithmetic.
*----------------------------------------------------------------------------*/
float32 roundAndPackFloat32(int zSign, Bit16s zExp, Bit32u zSig, struct float_status_t *status);
/*----------------------------------------------------------------------------
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
| and significand `zSig', and returns the proper single-precision floating-
| point value corresponding to the abstract input. This routine is just like
| `roundAndPackFloat32' except that `zSig' does not have to be normalized.
| Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
| floating-point exponent.
*----------------------------------------------------------------------------*/
float32 normalizeRoundAndPackFloat32(int zSign, Bit16s zExp, Bit32u zSig, struct float_status_t *status);
/*----------------------------------------------------------------------------
| Normalizes the subnormal double-precision floating-point value represented
| by the denormalized significand `aSig'. The normalized exponent and
| significand are stored at the locations pointed to by `zExpPtr' and
| `zSigPtr', respectively.
*----------------------------------------------------------------------------*/
void normalizeFloat64Subnormal(Bit64u aSig, Bit16s *zExpPtr, Bit64u *zSigPtr);
/*----------------------------------------------------------------------------
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
| and significand `zSig', and returns the proper double-precision floating-
| point value corresponding to the abstract input. Ordinarily, the abstract
| value is simply rounded and packed into the double-precision format, with
| the inexact exception raised if the abstract input cannot be represented
| exactly. However, if the abstract value is too large, the overflow and
| inexact exceptions are raised and an infinity or maximal finite value is
| returned. If the abstract value is too small, the input value is rounded
| to a subnormal number, and the underflow and inexact exceptions are raised
| if the abstract input cannot be represented exactly as a subnormal double-
| precision floating-point number.
| The input significand `zSig' has its binary point between bits 62
| and 61, which is 10 bits to the left of the usual location. This shifted
| significand must be normalized or smaller. If `zSig' is not normalized,
| `zExp' must be 0; in that case, the result returned is a subnormal number,
| and it must not require rounding. In the usual case that `zSig' is
| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
| The handling of underflow and overflow follows the IEC/IEEE Standard for
| Binary Floating-Point Arithmetic.
*----------------------------------------------------------------------------*/
float64 roundAndPackFloat64(int zSign, Bit16s zExp, Bit64u zSig, struct float_status_t *status);
/*----------------------------------------------------------------------------
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
| and significand `zSig', and returns the proper double-precision floating-
| point value corresponding to the abstract input. This routine is just like
| `roundAndPackFloat64' except that `zSig' does not have to be normalized.
| Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
| floating-point exponent.
*----------------------------------------------------------------------------*/
float64 normalizeRoundAndPackFloat64(int zSign, Bit16s zExp, Bit64u zSig, struct float_status_t *status);
#ifdef FLOATX80
/*----------------------------------------------------------------------------
| Normalizes the subnormal extended double-precision floating-point value
| represented by the denormalized significand `aSig'. The normalized exponent
| and significand are stored at the locations pointed to by `zExpPtr' and
| `zSigPtr', respectively.
*----------------------------------------------------------------------------*/
void normalizeFloatx80Subnormal(Bit64u aSig, Bit32s *zExpPtr, Bit64u *zSigPtr);
/*----------------------------------------------------------------------------
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
| and extended significand formed by the concatenation of `zSig0' and `zSig1',
| and returns the proper extended double-precision floating-point value
| corresponding to the abstract input. Ordinarily, the abstract value is
| rounded and packed into the extended double-precision format, with the
| inexact exception raised if the abstract input cannot be represented
| exactly. However, if the abstract value is too large, the overflow and
| inexact exceptions are raised and an infinity or maximal finite value is
| returned. If the abstract value is too small, the input value is rounded to
| a subnormal number, and the underflow and inexact exceptions are raised if
| the abstract input cannot be represented exactly as a subnormal extended
| double-precision floating-point number.
| If `roundingPrecision' is 32 or 64, the result is rounded to the same
| number of bits as single or double precision, respectively. Otherwise, the
| result is rounded to the full precision of the extended double-precision
| format.
| The input significand must be normalized or smaller. If the input
| significand is not normalized, `zExp' must be 0; in that case, the result
| returned is a subnormal number, and it must not require rounding. The
| handling of underflow and overflow follows the IEC/IEEE Standard for Binary
| Floating-Point Arithmetic.
*----------------------------------------------------------------------------*/
floatx80 roundAndPackFloatx80(int roundingPrecision,
int zSign, Bit32s zExp, Bit64u zSig0, Bit64u zSig1, struct float_status_t *status);
/*----------------------------------------------------------------------------
| Takes an abstract floating-point value having sign `zSign', exponent
| `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
| and returns the proper extended double-precision floating-point value
| corresponding to the abstract input. This routine is just like
| `roundAndPackFloatx80' except that the input significand does not have to be
| normalized.
*----------------------------------------------------------------------------*/
floatx80 normalizeRoundAndPackFloatx80(int roundingPrecision,
int zSign, Bit32s zExp, Bit64u zSig0, Bit64u zSig1, struct float_status_t *status);
#endif // FLOATX80
#ifdef FLOAT128
/*----------------------------------------------------------------------------
| Normalizes the subnormal quadruple-precision floating-point value
| represented by the denormalized significand formed by the concatenation of
| `aSig0' and `aSig1'. The normalized exponent is stored at the location
| pointed to by `zExpPtr'. The most significant 49 bits of the normalized
| significand are stored at the location pointed to by `zSig0Ptr', and the
| least significant 64 bits of the normalized significand are stored at the
| location pointed to by `zSig1Ptr'.
*----------------------------------------------------------------------------*/
void normalizeFloat128Subnormal(
Bit64u aSig0, Bit64u aSig1, Bit32s *zExpPtr, Bit64u *zSig0Ptr, Bit64u *zSig1Ptr);
/*----------------------------------------------------------------------------
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
| and extended significand formed by the concatenation of `zSig0', `zSig1',
| and `zSig2', and returns the proper quadruple-precision floating-point value
| corresponding to the abstract input. Ordinarily, the abstract value is
| simply rounded and packed into the quadruple-precision format, with the
| inexact exception raised if the abstract input cannot be represented
| exactly. However, if the abstract value is too large, the overflow and
| inexact exceptions are raised and an infinity or maximal finite value is
| returned. If the abstract value is too small, the input value is rounded to
| a subnormal number, and the underflow and inexact exceptions are raised if
| the abstract input cannot be represented exactly as a subnormal quadruple-
| precision floating-point number.
| The input significand must be normalized or smaller. If the input
| significand is not normalized, `zExp' must be 0; in that case, the result
| returned is a subnormal number, and it must not require rounding. In the
| usual case that the input significand is normalized, `zExp' must be 1 less
| than the ``true'' floating-point exponent. The handling of underflow and
| overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
*----------------------------------------------------------------------------*/
float128 roundAndPackFloat128(
int zSign, Bit32s zExp, Bit64u zSig0, Bit64u zSig1, Bit64u zSig2, struct float_status_t *status);
/*----------------------------------------------------------------------------
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
| and significand formed by the concatenation of `zSig0' and `zSig1', and
| returns the proper quadruple-precision floating-point value corresponding
| to the abstract input. This routine is just like `roundAndPackFloat128'
| except that the input significand has fewer bits and does not have to be
| normalized. In all cases, `zExp' must be 1 less than the ``true'' floating-
| point exponent.
*----------------------------------------------------------------------------*/
float128 normalizeRoundAndPackFloat128(
int zSign, Bit32s zExp, Bit64u zSig0, Bit64u zSig1, struct float_status_t *status);
#endif // FLOAT128
#endif

View File

@@ -0,0 +1,187 @@
/*============================================================================
This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
Arithmetic Package, Release 2b.
Written by John R. Hauser. This work was made possible in part by the
International Computer Science Institute, located at Suite 600, 1947 Center
Street, Berkeley, California 94704. Funding was partially provided by the
National Science Foundation under grant MIP-9311980. The original version
of this code was written as part of a project to build a fixed-point vector
processor in collaboration with the University of California at Berkeley,
overseen by Profs. Nelson Morgan and John Wawrzynek. More information
is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
arithmetic/SoftFloat.html'.
THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has
been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
Derivative works are acceptable, even for commercial purposes, so long as
(1) the source code for the derivative work includes prominent notice that
the work is derivative, and (2) the source code includes prominent notice with
these four paragraphs for those parts of this code that are retained.
=============================================================================*/
#define FLOAT128
/*============================================================================
* Adapted for Bochs (x86 achitecture simulator) by
* Stanislav Shwartsman [sshwarts at sourceforge net]
* ==========================================================================*/
#include "softfloat.h"
#include "softfloat-specialize.h"
/*----------------------------------------------------------------------------
| Takes two single-precision floating-point values `a' and `b', one of which
| is a NaN, and returns the appropriate NaN result. If either `a' or `b' is a
| signaling NaN, the invalid exception is raised.
*----------------------------------------------------------------------------*/
float32 propagateFloat32NaN(float32 a, float32 b, struct float_status_t *status)
{
int aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
aIsNaN = float32_is_nan(a);
aIsSignalingNaN = float32_is_signaling_nan(a);
bIsNaN = float32_is_nan(b);
bIsSignalingNaN = float32_is_signaling_nan(b);
a |= 0x00400000;
b |= 0x00400000;
if (aIsSignalingNaN | bIsSignalingNaN) float_raise(status, float_flag_invalid);
if (get_float_nan_handling_mode(status) == float_larger_significand_nan) {
if (aIsSignalingNaN) {
if (bIsSignalingNaN) goto returnLargerSignificand;
return bIsNaN ? b : a;
}
else if (aIsNaN) {
if (bIsSignalingNaN | ! bIsNaN) return a;
returnLargerSignificand:
if ((Bit32u) (a<<1) < (Bit32u) (b<<1)) return b;
if ((Bit32u) (b<<1) < (Bit32u) (a<<1)) return a;
return (a < b) ? a : b;
}
else {
return b;
}
} else {
return (aIsSignalingNaN | aIsNaN) ? a : b;
}
}
/*----------------------------------------------------------------------------
| Takes two double-precision floating-point values `a' and `b', one of which
| is a NaN, and returns the appropriate NaN result. If either `a' or `b' is a
| signaling NaN, the invalid exception is raised.
*----------------------------------------------------------------------------*/
float64 propagateFloat64NaN(float64 a, float64 b, struct float_status_t *status)
{
int aIsNaN = float64_is_nan(a);
int aIsSignalingNaN = float64_is_signaling_nan(a);
int bIsNaN = float64_is_nan(b);
int bIsSignalingNaN = float64_is_signaling_nan(b);
a |= BX_CONST64(0x0008000000000000);
b |= BX_CONST64(0x0008000000000000);
if (aIsSignalingNaN | bIsSignalingNaN) float_raise(status, float_flag_invalid);
if (get_float_nan_handling_mode(status) == float_larger_significand_nan) {
if (aIsSignalingNaN) {
if (bIsSignalingNaN) goto returnLargerSignificand;
return bIsNaN ? b : a;
}
else if (aIsNaN) {
if (bIsSignalingNaN | ! bIsNaN) return a;
returnLargerSignificand:
if ((Bit64u) (a<<1) < (Bit64u) (b<<1)) return b;
if ((Bit64u) (b<<1) < (Bit64u) (a<<1)) return a;
return (a < b) ? a : b;
}
else {
return b;
}
} else {
return (aIsSignalingNaN | aIsNaN) ? a : b;
}
}
#ifdef FLOATX80
/*----------------------------------------------------------------------------
| Takes two extended double-precision floating-point values `a' and `b', one
| of which is a NaN, and returns the appropriate NaN result. If either `a' or
| `b' is a signaling NaN, the invalid exception is raised.
*----------------------------------------------------------------------------*/
floatx80 propagateFloatx80NaN(floatx80 a, floatx80 b, struct float_status_t *status)
{
int aIsNaN = floatx80_is_nan(a);
int aIsSignalingNaN = floatx80_is_signaling_nan(a);
int bIsNaN = floatx80_is_nan(b);
int bIsSignalingNaN = floatx80_is_signaling_nan(b);
a.fraction |= BX_CONST64(0xC000000000000000);
b.fraction |= BX_CONST64(0xC000000000000000);
if (aIsSignalingNaN | bIsSignalingNaN) float_raise(status, float_flag_invalid);
if (aIsSignalingNaN) {
if (bIsSignalingNaN) goto returnLargerSignificand;
return bIsNaN ? b : a;
}
else if (aIsNaN) {
if (bIsSignalingNaN | ! bIsNaN) return a;
returnLargerSignificand:
if (a.fraction < b.fraction) return b;
if (b.fraction < a.fraction) return a;
return (a.exp < b.exp) ? a : b;
}
else {
return b;
}
}
#endif /* FLOATX80 */
#ifdef FLOAT128
/*----------------------------------------------------------------------------
| Takes two quadruple-precision floating-point values `a' and `b', one of
| which is a NaN, and returns the appropriate NaN result. If either `a' or
| `b' is a signaling NaN, the invalid exception is raised.
*----------------------------------------------------------------------------*/
float128 propagateFloat128NaN(float128 a, float128 b, struct float_status_t *status)
{
int aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
aIsNaN = float128_is_nan(a);
aIsSignalingNaN = float128_is_signaling_nan(a);
bIsNaN = float128_is_nan(b);
bIsSignalingNaN = float128_is_signaling_nan(b);
a.hi |= BX_CONST64(0x0000800000000000);
b.hi |= BX_CONST64(0x0000800000000000);
if (aIsSignalingNaN | bIsSignalingNaN) float_raise(status, float_flag_invalid);
if (aIsSignalingNaN) {
if (bIsSignalingNaN) goto returnLargerSignificand;
return bIsNaN ? b : a;
}
else if (aIsNaN) {
if (bIsSignalingNaN | !bIsNaN) return a;
returnLargerSignificand:
if (lt128(a.hi<<1, a.lo, b.hi<<1, b.lo)) return b;
if (lt128(b.hi<<1, b.lo, a.hi<<1, a.lo)) return a;
return (a.hi < b.hi) ? a : b;
}
else {
return b;
}
}
/*----------------------------------------------------------------------------
| The pattern for a default generated quadruple-precision NaN.
*----------------------------------------------------------------------------*/
const float128 float128_default_nan =
packFloat128(float128_default_nan_hi, float128_default_nan_lo);
#endif /* FLOAT128 */

View File

@@ -0,0 +1,788 @@
/*============================================================================
This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
Arithmetic Package, Release 2b.
Written by John R. Hauser. This work was made possible in part by the
International Computer Science Institute, located at Suite 600, 1947 Center
Street, Berkeley, California 94704. Funding was partially provided by the
National Science Foundation under grant MIP-9311980. The original version
of this code was written as part of a project to build a fixed-point vector
processor in collaboration with the University of California at Berkeley,
overseen by Profs. Nelson Morgan and John Wawrzynek. More information
is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
arithmetic/SoftFloat.html'.
THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has
been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
Derivative works are acceptable, even for commercial purposes, so long as
(1) the source code for the derivative work includes prominent notice that
the work is derivative, and (2) the source code includes prominent notice with
these four paragraphs for those parts of this code that are retained.
=============================================================================*/
#ifndef _SOFTFLOAT_SPECIALIZE_H_
#define _SOFTFLOAT_SPECIALIZE_H_
#include "softfloat.h"
/*============================================================================
* Adapted for Bochs (x86 achitecture simulator) by
* Stanislav Shwartsman [sshwarts at sourceforge net]
* ==========================================================================*/
#define int16_indefinite ((Bit16s)0x8000)
#define int32_indefinite ((Bit32s)0x80000000)
#define int64_indefinite BX_CONST64(0x8000000000000000)
#define uint16_indefinite (0xffff)
#define uint32_indefinite (0xffffffff)
#define uint64_indefinite BX_CONST64(0xffffffffffffffff)
/*----------------------------------------------------------------------------
| Internal canonical NaN format.
*----------------------------------------------------------------------------*/
typedef struct {
int sign;
Bit64u hi, lo;
} commonNaNT;
#ifdef FLOAT16
/*----------------------------------------------------------------------------
| The pattern for a default generated half-precision NaN.
*----------------------------------------------------------------------------*/
extern const float16 float16_default_nan;
#define float16_fraction extractFloat16Frac
#define float16_exp extractFloat16Exp
#define float16_sign extractFloat16Sign
/*----------------------------------------------------------------------------
| Returns the fraction bits of the half-precision floating-point value `a'.
*----------------------------------------------------------------------------*/
BX_CPP_INLINE Bit16u extractFloat16Frac(float16 a)
{
return a & 0x3FF;
}
/*----------------------------------------------------------------------------
| Returns the exponent bits of the half-precision floating-point value `a'.
*----------------------------------------------------------------------------*/
BX_CPP_INLINE Bit16s extractFloat16Exp(float16 a)
{
return (a>>10) & 0x1F;
}
/*----------------------------------------------------------------------------
| Returns the sign bit of the half-precision floating-point value `a'.
*----------------------------------------------------------------------------*/
BX_CPP_INLINE int extractFloat16Sign(float16 a)
{
return a>>15;
}
/*----------------------------------------------------------------------------
| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
| single-precision floating-point value, returning the result. After being
| shifted into the proper positions, the three fields are simply added
| together to form the result. This means that any integer portion of `zSig'
| will be added into the exponent. Since a properly normalized significand
| will have an integer portion equal to 1, the `zExp' input should be 1 less
| than the desired result exponent whenever `zSig' is a complete, normalized
| significand.
*----------------------------------------------------------------------------*/
BX_CPP_INLINE float16 packFloat16(int zSign, int zExp, Bit16u zSig)
{
return (((Bit16u) zSign)<<15) + (((Bit16u) zExp)<<10) + zSig;
}
/*----------------------------------------------------------------------------
| Returns 1 if the half-precision floating-point value `a' is a NaN;
| otherwise returns 0.
*----------------------------------------------------------------------------*/
BX_CPP_INLINE int float16_is_nan(float16 a)
{
return (0xF800 < (Bit16u) (a<<1));
}
/*----------------------------------------------------------------------------
| Returns 1 if the half-precision floating-point value `a' is a signaling
| NaN; otherwise returns 0.
*----------------------------------------------------------------------------*/
BX_CPP_INLINE int float16_is_signaling_nan(float16 a)
{
return (((a>>9) & 0x3F) == 0x3E) && (a & 0x1FF);
}
/*----------------------------------------------------------------------------
| Returns 1 if the half-precision floating-point value `a' is denormal;
| otherwise returns 0.
*----------------------------------------------------------------------------*/
BX_CPP_INLINE int float16_is_denormal(float16 a)
{
return (extractFloat16Exp(a) == 0) && (extractFloat16Frac(a) != 0);
}
/*----------------------------------------------------------------------------
| Convert float16 denormals to zero.
*----------------------------------------------------------------------------*/
BX_CPP_INLINE float16 float16_denormal_to_zero(float16 a)
{
if (float16_is_denormal(a)) a &= 0x8000;
return a;
}
/*----------------------------------------------------------------------------
| Returns the result of converting the half-precision floating-point NaN
| `a' to the canonical NaN format. If `a' is a signaling NaN, the invalid
| exception is raised.
*----------------------------------------------------------------------------*/
BX_CPP_INLINE commonNaNT float16ToCommonNaN(float16 a, struct float_status_t *status)
{
commonNaNT z;
if (float16_is_signaling_nan(a)) float_raise(status, float_flag_invalid);
z.sign = a>>15;
z.lo = 0;
z.hi = ((Bit64u) a)<<54;
return z;
}
/*----------------------------------------------------------------------------
| Returns the result of converting the canonical NaN `a' to the half-
| precision floating-point format.
*----------------------------------------------------------------------------*/
BX_CPP_INLINE float16 commonNaNToFloat16(commonNaNT a)
{
return (((Bit16u) a.sign)<<15) | 0x7E00 | (Bit16u)(a.hi>>54);
}
#endif
/*----------------------------------------------------------------------------
| Commonly used single-precision floating point constants
*----------------------------------------------------------------------------*/
extern const float32 float32_negative_inf;
extern const float32 float32_positive_inf;
extern const float32 float32_negative_zero;
extern const float32 float32_positive_zero;
extern const float32 float32_negative_one;
extern const float32 float32_positive_one;
extern const float32 float32_max_float;
extern const float32 float32_min_float;
/*----------------------------------------------------------------------------
| The pattern for a default generated single-precision NaN.
*----------------------------------------------------------------------------*/
extern const float32 float32_default_nan;
#define float32_fraction extractFloat32Frac
#define float32_exp extractFloat32Exp
#define float32_sign extractFloat32Sign
#define FLOAT32_EXP_BIAS 0x7F
/*----------------------------------------------------------------------------
| Returns the fraction bits of the single-precision floating-point value `a'.
*----------------------------------------------------------------------------*/
BX_CPP_INLINE Bit32u extractFloat32Frac(float32 a)
{
return a & 0x007FFFFF;
}
/*----------------------------------------------------------------------------
| Returns the exponent bits of the single-precision floating-point value `a'.
*----------------------------------------------------------------------------*/
BX_CPP_INLINE Bit16s extractFloat32Exp(float32 a)
{
return (a>>23) & 0xFF;
}
/*----------------------------------------------------------------------------
| Returns the sign bit of the single-precision floating-point value `a'.
*----------------------------------------------------------------------------*/
BX_CPP_INLINE int extractFloat32Sign(float32 a)
{
return a>>31;
}
/*----------------------------------------------------------------------------
| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
| single-precision floating-point value, returning the result. After being
| shifted into the proper positions, the three fields are simply added
| together to form the result. This means that any integer portion of `zSig'
| will be added into the exponent. Since a properly normalized significand
| will have an integer portion equal to 1, the `zExp' input should be 1 less
| than the desired result exponent whenever `zSig' is a complete, normalized
| significand.
*----------------------------------------------------------------------------*/
BX_CPP_INLINE float32 packFloat32(int zSign, Bit16s zExp, Bit32u zSig)
{
return (((Bit32u) zSign)<<31) + (((Bit32u) zExp)<<23) + zSig;
}
/*----------------------------------------------------------------------------
| Returns 1 if the single-precision floating-point value `a' is a NaN;
| otherwise returns 0.
*----------------------------------------------------------------------------*/
BX_CPP_INLINE int float32_is_nan(float32 a)
{
return (0xFF000000 < (Bit32u) (a<<1));
}
/*----------------------------------------------------------------------------
| Returns 1 if the single-precision floating-point value `a' is a signaling
| NaN; otherwise returns 0.
*----------------------------------------------------------------------------*/
BX_CPP_INLINE int float32_is_signaling_nan(float32 a)
{
return (((a>>22) & 0x1FF) == 0x1FE) && (a & 0x003FFFFF);
}
/*----------------------------------------------------------------------------
| Returns 1 if the single-precision floating-point value `a' is denormal;
| otherwise returns 0.
*----------------------------------------------------------------------------*/
BX_CPP_INLINE int float32_is_denormal(float32 a)
{
return (extractFloat32Exp(a) == 0) && (extractFloat32Frac(a) != 0);
}
/*----------------------------------------------------------------------------
| Convert float32 denormals to zero.
*----------------------------------------------------------------------------*/
BX_CPP_INLINE float32 float32_denormal_to_zero(float32 a)
{
if (float32_is_denormal(a)) a &= 0x80000000;
return a;
}
/*----------------------------------------------------------------------------
| Returns the result of converting the single-precision floating-point NaN
| `a' to the canonical NaN format. If `a' is a signaling NaN, the invalid
| exception is raised.
*----------------------------------------------------------------------------*/
BX_CPP_INLINE commonNaNT float32ToCommonNaN(float32 a, struct float_status_t *status)
{
commonNaNT z;
if (float32_is_signaling_nan(a)) float_raise(status, float_flag_invalid);
z.sign = a>>31;
z.lo = 0;
z.hi = ((Bit64u) a)<<41;
return z;
}
/*----------------------------------------------------------------------------
| Returns the result of converting the canonical NaN `a' to the single-
| precision floating-point format.
*----------------------------------------------------------------------------*/
BX_CPP_INLINE float32 commonNaNToFloat32(commonNaNT a)
{
return (((Bit32u) a.sign)<<31) | 0x7FC00000 | (Bit32u)(a.hi>>41);
}
/*----------------------------------------------------------------------------
| Takes two single-precision floating-point values `a' and `b', one of which
| is a NaN, and returns the appropriate NaN result. If either `a' or `b' is a
| signaling NaN, the invalid exception is raised.
*----------------------------------------------------------------------------*/
float32 propagateFloat32NaN(float32 a, float32 b, struct float_status_t *status);
/*----------------------------------------------------------------------------
| Takes single-precision floating-point NaN `a' and returns the appropriate
| NaN result. If `a' is a signaling NaN, the invalid exception is raised.
*----------------------------------------------------------------------------*/
BX_CPP_INLINE float32 propagateFloat32NaNOne(float32 a, struct float_status_t *status)
{
if (float32_is_signaling_nan(a))
float_raise(status, float_flag_invalid);
return a | 0x00400000;
}
/*----------------------------------------------------------------------------
| Commonly used single-precision floating point constants
*----------------------------------------------------------------------------*/
extern const float64 float64_negative_inf;
extern const float64 float64_positive_inf;
extern const float64 float64_negative_zero;
extern const float64 float64_positive_zero;
extern const float64 float64_negative_one;
extern const float64 float64_positive_one;
extern const float64 float64_max_float;
extern const float64 float64_min_float;
/*----------------------------------------------------------------------------
| The pattern for a default generated double-precision NaN.
*----------------------------------------------------------------------------*/
extern const float64 float64_default_nan;
#define float64_fraction extractFloat64Frac
#define float64_exp extractFloat64Exp
#define float64_sign extractFloat64Sign
#define FLOAT64_EXP_BIAS 0x3FF
/*----------------------------------------------------------------------------
| Returns the fraction bits of the double-precision floating-point value `a'.
*----------------------------------------------------------------------------*/
BX_CPP_INLINE Bit64u extractFloat64Frac(float64 a)
{
return a & BX_CONST64(0x000FFFFFFFFFFFFF);
}
/*----------------------------------------------------------------------------
| Returns the exponent bits of the double-precision floating-point value `a'.
*----------------------------------------------------------------------------*/
BX_CPP_INLINE Bit16s extractFloat64Exp(float64 a)
{
return (Bit16s)(a>>52) & 0x7FF;
}
/*----------------------------------------------------------------------------
| Returns the sign bit of the double-precision floating-point value `a'.
*----------------------------------------------------------------------------*/
BX_CPP_INLINE int extractFloat64Sign(float64 a)
{
return (int)(a>>63);
}
/*----------------------------------------------------------------------------
| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
| double-precision floating-point value, returning the result. After being
| shifted into the proper positions, the three fields are simply added
| together to form the result. This means that any integer portion of `zSig'
| will be added into the exponent. Since a properly normalized significand
| will have an integer portion equal to 1, the `zExp' input should be 1 less
| than the desired result exponent whenever `zSig' is a complete, normalized
| significand.
*----------------------------------------------------------------------------*/
BX_CPP_INLINE float64 packFloat64(int zSign, Bit16s zExp, Bit64u zSig)
{
return (((Bit64u) zSign)<<63) + (((Bit64u) zExp)<<52) + zSig;
}
/*----------------------------------------------------------------------------
| Returns 1 if the double-precision floating-point value `a' is a NaN;
| otherwise returns 0.
*----------------------------------------------------------------------------*/
BX_CPP_INLINE int float64_is_nan(float64 a)
{
return (BX_CONST64(0xFFE0000000000000) < (Bit64u) (a<<1));
}
/*----------------------------------------------------------------------------
| Returns 1 if the double-precision floating-point value `a' is a signaling
| NaN; otherwise returns 0.
*----------------------------------------------------------------------------*/
BX_CPP_INLINE int float64_is_signaling_nan(float64 a)
{
return (((a>>51) & 0xFFF) == 0xFFE) && (a & BX_CONST64(0x0007FFFFFFFFFFFF));
}
/*----------------------------------------------------------------------------
| Returns 1 if the double-precision floating-point value `a' is denormal;
| otherwise returns 0.
*----------------------------------------------------------------------------*/
BX_CPP_INLINE int float64_is_denormal(float64 a)
{
return (extractFloat64Exp(a) == 0) && (extractFloat64Frac(a) != 0);
}
/*----------------------------------------------------------------------------
| Convert float64 denormals to zero.
*----------------------------------------------------------------------------*/
BX_CPP_INLINE float64 float64_denormal_to_zero(float64 a)
{
if (float64_is_denormal(a)) a &= ((Bit64u)(1) << 63);
return a;
}
/*----------------------------------------------------------------------------
| Returns the result of converting the double-precision floating-point NaN
| `a' to the canonical NaN format. If `a' is a signaling NaN, the invalid
| exception is raised.
*----------------------------------------------------------------------------*/
BX_CPP_INLINE commonNaNT float64ToCommonNaN(float64 a, struct float_status_t *status)
{
commonNaNT z;
if (float64_is_signaling_nan(a)) float_raise(status, float_flag_invalid);
z.sign = (int)(a>>63);
z.lo = 0;
z.hi = a<<12;
return z;
}
/*----------------------------------------------------------------------------
| Returns the result of converting the canonical NaN `a' to the double-
| precision floating-point format.
*----------------------------------------------------------------------------*/
BX_CPP_INLINE float64 commonNaNToFloat64(commonNaNT a)
{
return (((Bit64u) a.sign)<<63) | BX_CONST64(0x7FF8000000000000) | (a.hi>>12);
}
/*----------------------------------------------------------------------------
| Takes two double-precision floating-point values `a' and `b', one of which
| is a NaN, and returns the appropriate NaN result. If either `a' or `b' is a
| signaling NaN, the invalid exception is raised.
*----------------------------------------------------------------------------*/
float64 propagateFloat64NaN(float64 a, float64 b, struct float_status_t *status);
/*----------------------------------------------------------------------------
| Takes double-precision floating-point NaN `a' and returns the appropriate
| NaN result. If `a' is a signaling NaN, the invalid exception is raised.
*----------------------------------------------------------------------------*/
BX_CPP_INLINE float64 propagateFloat64NaNOne(float64 a, struct float_status_t *status)
{
if (float64_is_signaling_nan(a))
float_raise(status, float_flag_invalid);
return a | BX_CONST64(0x0008000000000000);
}
#ifdef FLOATX80
/*----------------------------------------------------------------------------
| The pattern for a default generated extended double-precision NaN. The
| `high' and `low' values hold the most- and least-significant bits,
| respectively.
*----------------------------------------------------------------------------*/
#define floatx80_default_nan_exp 0xFFFF
#define floatx80_default_nan_fraction BX_CONST64(0xC000000000000000)
#define floatx80_fraction extractFloatx80Frac
#define floatx80_exp extractFloatx80Exp
#define floatx80_sign extractFloatx80Sign
#define FLOATX80_EXP_BIAS 0x3FFF
/*----------------------------------------------------------------------------
| Returns the fraction bits of the extended double-precision floating-point
| value `a'.
*----------------------------------------------------------------------------*/
BX_CPP_INLINE Bit64u extractFloatx80Frac(floatx80 a)
{
return a.fraction;
}
/*----------------------------------------------------------------------------
| Returns the exponent bits of the extended double-precision floating-point
| value `a'.
*----------------------------------------------------------------------------*/
BX_CPP_INLINE Bit32s extractFloatx80Exp(floatx80 a)
{
return a.exp & 0x7FFF;
}
/*----------------------------------------------------------------------------
| Returns the sign bit of the extended double-precision floating-point value
| `a'.
*----------------------------------------------------------------------------*/
BX_CPP_INLINE int extractFloatx80Sign(floatx80 a)
{
return a.exp>>15;
}
/*----------------------------------------------------------------------------
| Packs the sign `zSign', exponent `zExp', and significand `zSig' into an
| extended double-precision floating-point value, returning the result.
*----------------------------------------------------------------------------*/
BX_CPP_INLINE floatx80 packFloatx80(int zSign, Bit32s zExp, Bit64u zSig)
{
floatx80 z;
z.fraction = zSig;
z.exp = (zSign << 15) + zExp;
return z;
}
/*----------------------------------------------------------------------------
| Returns 1 if the extended double-precision floating-point value `a' is a
| NaN; otherwise returns 0.
*----------------------------------------------------------------------------*/
BX_CPP_INLINE int floatx80_is_nan(floatx80 a)
{
return ((a.exp & 0x7FFF) == 0x7FFF) && (Bit64s) (a.fraction<<1);
}
/*----------------------------------------------------------------------------
| Returns 1 if the extended double-precision floating-point value `a' is a
| signaling NaN; otherwise returns 0.
*----------------------------------------------------------------------------*/
BX_CPP_INLINE int floatx80_is_signaling_nan(floatx80 a)
{
Bit64u aLow = a.fraction & ~BX_CONST64(0x4000000000000000);
return ((a.exp & 0x7FFF) == 0x7FFF) &&
((Bit64u) (aLow<<1)) && (a.fraction == aLow);
}
/*----------------------------------------------------------------------------
| Returns 1 if the extended double-precision floating-point value `a' is an
| unsupported; otherwise returns 0.
*----------------------------------------------------------------------------*/
BX_CPP_INLINE int floatx80_is_unsupported(floatx80 a)
{
return ((a.exp & 0x7FFF) && !(a.fraction & BX_CONST64(0x8000000000000000)));
}
/*----------------------------------------------------------------------------
| Returns the result of converting the extended double-precision floating-
| point NaN `a' to the canonical NaN format. If `a' is a signaling NaN, the
| invalid exception is raised.
*----------------------------------------------------------------------------*/
BX_CPP_INLINE commonNaNT floatx80ToCommonNaN(floatx80 a, struct float_status_t *status)
{
commonNaNT z;
if (floatx80_is_signaling_nan(a)) float_raise(status, float_flag_invalid);
z.sign = a.exp >> 15;
z.lo = 0;
z.hi = a.fraction << 1;
return z;
}
/*----------------------------------------------------------------------------
| Returns the result of converting the canonical NaN `a' to the extended
| double-precision floating-point format.
*----------------------------------------------------------------------------*/
BX_CPP_INLINE floatx80 commonNaNToFloatx80(commonNaNT a)
{
floatx80 z;
z.fraction = BX_CONST64(0xC000000000000000) | (a.hi>>1);
z.exp = (((Bit16u) a.sign)<<15) | 0x7FFF;
return z;
}
/*----------------------------------------------------------------------------
| Takes two extended double-precision floating-point values `a' and `b', one
| of which is a NaN, and returns the appropriate NaN result. If either `a' or
| `b' is a signaling NaN, the invalid exception is raised.
*----------------------------------------------------------------------------*/
floatx80 propagateFloatx80NaN(floatx80 a, floatx80 b, struct float_status_t *status);
/*----------------------------------------------------------------------------
| Takes extended double-precision floating-point NaN `a' and returns the
| appropriate NaN result. If `a' is a signaling NaN, the invalid exception
| is raised.
*----------------------------------------------------------------------------*/
BX_CPP_INLINE floatx80 propagateFloatx80NaNOne(floatx80 a, struct float_status_t *status)
{
if (floatx80_is_signaling_nan(a))
float_raise(status, float_flag_invalid);
a.fraction |= BX_CONST64(0xC000000000000000);
return a;
}
#endif /* FLOATX80 */
#ifdef FLOAT128
#include "softfloat-macros.h"
/*----------------------------------------------------------------------------
| The pattern for a default generated quadruple-precision NaN. The `high' and
| `low' values hold the most- and least-significant bits, respectively.
*----------------------------------------------------------------------------*/
#define float128_default_nan_hi BX_CONST64(0xFFFF800000000000)
#define float128_default_nan_lo BX_CONST64(0x0000000000000000)
#define float128_exp extractFloat128Exp
/*----------------------------------------------------------------------------
| Returns the least-significant 64 fraction bits of the quadruple-precision
| floating-point value `a'.
*----------------------------------------------------------------------------*/
BX_CPP_INLINE Bit64u extractFloat128Frac1(float128 a)
{
return a.lo;
}
/*----------------------------------------------------------------------------
| Returns the most-significant 48 fraction bits of the quadruple-precision
| floating-point value `a'.
*----------------------------------------------------------------------------*/
BX_CPP_INLINE Bit64u extractFloat128Frac0(float128 a)
{
return a.hi & BX_CONST64(0x0000FFFFFFFFFFFF);
}
/*----------------------------------------------------------------------------
| Returns the exponent bits of the quadruple-precision floating-point value
| `a'.
*----------------------------------------------------------------------------*/
BX_CPP_INLINE Bit32s extractFloat128Exp(float128 a)
{
return ((Bit32s)(a.hi>>48)) & 0x7FFF;
}
/*----------------------------------------------------------------------------
| Returns the sign bit of the quadruple-precision floating-point value `a'.
*----------------------------------------------------------------------------*/
BX_CPP_INLINE int extractFloat128Sign(float128 a)
{
return (int)(a.hi >> 63);
}
/*----------------------------------------------------------------------------
| Packs the sign `zSign', the exponent `zExp', and the significand formed
| by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
| floating-point value, returning the result. After being shifted into the
| proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
| added together to form the most significant 32 bits of the result. This
| means that any integer portion of `zSig0' will be added into the exponent.
| Since a properly normalized significand will have an integer portion equal
| to 1, the `zExp' input should be 1 less than the desired result exponent
| whenever `zSig0' and `zSig1' concatenated form a complete, normalized
| significand.
*----------------------------------------------------------------------------*/
BX_CPP_INLINE float128 packFloat128Four(int zSign, Bit32s zExp, Bit64u zSig0, Bit64u zSig1)
{
float128 z;
z.lo = zSig1;
z.hi = (((Bit64u) zSign)<<63) + (((Bit64u) zExp)<<48) + zSig0;
return z;
}
/*----------------------------------------------------------------------------
| Packs two 64-bit precision integers into into the quadruple-precision
| floating-point value, returning the result.
*----------------------------------------------------------------------------*/
BX_CPP_INLINE float128 packFloat128(Bit64u zHi, Bit64u zLo)
{
float128 z;
z.lo = zLo;
z.hi = zHi;
return z;
}
#ifdef _MSC_VER
#define PACK_FLOAT_128(hi,lo) { lo, hi }
#else
#define PACK_FLOAT_128(hi,lo) packFloat128(BX_CONST64(hi),BX_CONST64(lo))
#endif
/*----------------------------------------------------------------------------
| Returns 1 if the quadruple-precision floating-point value `a' is a NaN;
| otherwise returns 0.
*----------------------------------------------------------------------------*/
BX_CPP_INLINE int float128_is_nan(float128 a)
{
return (BX_CONST64(0xFFFE000000000000) <= (Bit64u) (a.hi<<1))
&& (a.lo || (a.hi & BX_CONST64(0x0000FFFFFFFFFFFF)));
}
/*----------------------------------------------------------------------------
| Returns 1 if the quadruple-precision floating-point value `a' is a
| signaling NaN; otherwise returns 0.
*----------------------------------------------------------------------------*/
BX_CPP_INLINE int float128_is_signaling_nan(float128 a)
{
return (((a.hi>>47) & 0xFFFF) == 0xFFFE)
&& (a.lo || (a.hi & BX_CONST64(0x00007FFFFFFFFFFF)));
}
/*----------------------------------------------------------------------------
| Returns the result of converting the quadruple-precision floating-point NaN
| `a' to the canonical NaN format. If `a' is a signaling NaN, the invalid
| exception is raised.
*----------------------------------------------------------------------------*/
BX_CPP_INLINE commonNaNT float128ToCommonNaN(float128 a, struct float_status_t *status)
{
commonNaNT z;
if (float128_is_signaling_nan(a)) float_raise(status, float_flag_invalid);
z.sign = (int)(a.hi>>63);
shortShift128Left(a.hi, a.lo, 16, &z.hi, &z.lo);
return z;
}
/*----------------------------------------------------------------------------
| Returns the result of converting the canonical NaN `a' to the quadruple-
| precision floating-point format.
*----------------------------------------------------------------------------*/
BX_CPP_INLINE float128 commonNaNToFloat128(commonNaNT a)
{
float128 z;
shift128Right(a.hi, a.lo, 16, &z.hi, &z.lo);
z.hi |= (((Bit64u) a.sign)<<63) | BX_CONST64(0x7FFF800000000000);
return z;
}
/*----------------------------------------------------------------------------
| Takes two quadruple-precision floating-point values `a' and `b', one of
| which is a NaN, and returns the appropriate NaN result. If either `a' or
| `b' is a signaling NaN, the invalid exception is raised.
*----------------------------------------------------------------------------*/
float128 propagateFloat128NaN(float128 a, float128 b, struct float_status_t *status);
/*----------------------------------------------------------------------------
| The pattern for a default generated quadruple-precision NaN.
*----------------------------------------------------------------------------*/
extern const float128 float128_default_nan;
#endif /* FLOAT128 */
#endif

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,488 @@
/*============================================================================
This C header file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic
Package, Release 2b.
Written by John R. Hauser. This work was made possible in part by the
International Computer Science Institute, located at Suite 600, 1947 Center
Street, Berkeley, California 94704. Funding was partially provided by the
National Science Foundation under grant MIP-9311980. The original version
of this code was written as part of a project to build a fixed-point vector
processor in collaboration with the University of California at Berkeley,
overseen by Profs. Nelson Morgan and John Wawrzynek. More information
is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
arithmetic/SoftFloat.html'.
THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has
been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
Derivative works are acceptable, even for commercial purposes, so long as
(1) the source code for the derivative work includes prominent notice that
the work is derivative, and (2) the source code includes prominent notice with
these four paragraphs for those parts of this code that are retained.
=============================================================================*/
/*============================================================================
* Adapted for Bochs (x86 achitecture simulator) by
* Stanislav Shwartsman [sshwarts at sourceforge net]
* ==========================================================================*/
#include "config.h" /* generated by configure script from config.h.in */
#ifndef _SOFTFLOAT_H_
#define _SOFTFLOAT_H_
#define FLOAT16
#define FLOATX80
/*----------------------------------------------------------------------------
| Software IEC/IEEE floating-point types.
*----------------------------------------------------------------------------*/
#ifdef FLOAT16
typedef Bit16u float16;
#endif
typedef Bit32u float32;
typedef Bit64u float64;
/*----------------------------------------------------------------------------
| Software IEC/IEEE floating-point class.
*----------------------------------------------------------------------------*/
typedef enum {
float_zero,
float_SNaN,
float_QNaN,
float_negative_inf,
float_positive_inf,
float_denormal,
float_normalized
} float_class_t;
/*----------------------------------------------------------------------------
| Software IEC/IEEE floating-point NaN operands handling mode.
*----------------------------------------------------------------------------*/
enum float_nan_handling_mode_t {
float_larger_significand_nan = 0, // this mode used by x87 FPU
float_first_operand_nan = 1 // this mode used by SSE
};
/*----------------------------------------------------------------------------
| Software IEC/IEEE floating-point rounding mode.
*----------------------------------------------------------------------------*/
enum float_round_t {
float_round_nearest_even = 0,
float_round_down = 1,
float_round_up = 2,
float_round_to_zero = 3
};
/*----------------------------------------------------------------------------
| Software IEC/IEEE floating-point exception flags.
*----------------------------------------------------------------------------*/
enum float_exception_flag_t {
float_flag_invalid = 0x01,
float_flag_denormal = 0x02,
float_flag_divbyzero = 0x04,
float_flag_overflow = 0x08,
float_flag_underflow = 0x10,
float_flag_inexact = 0x20
};
extern const unsigned float_all_exceptions_mask;
#ifdef FLOATX80
#define RAISE_SW_C1 0x0200
#endif
/*----------------------------------------------------------------------------
| Software IEC/IEEE floating-point ordering relations
*----------------------------------------------------------------------------*/
enum {
float_relation_less = -1,
float_relation_equal = 0,
float_relation_greater = 1,
float_relation_unordered = 2
};
/*----------------------------------------------------------------------------
| Options to indicate which negations to perform in float*_muladd()
| Using these differs from negating an input or output before calling
| the muladd function in that this means that a NaN doesn't have its
| sign bit inverted before it is propagated.
*----------------------------------------------------------------------------*/
enum {
float_muladd_negate_c = 1,
float_muladd_negate_product = 2,
float_muladd_negate_result = float_muladd_negate_c | float_muladd_negate_product
};
/*----------------------------------------------------------------------------
| Software IEC/IEEE floating-point status structure.
*----------------------------------------------------------------------------*/
struct float_status_t
{
#ifdef FLOATX80
int float_rounding_precision; /* floatx80 only */
#endif
int float_rounding_mode;
int float_exception_flags;
int float_exception_masks;
int float_suppress_exception;
int float_nan_handling_mode; /* flag register */
int flush_underflow_to_zero; /* flag register */
int denormals_are_zeros; /* flag register */
};
/*----------------------------------------------------------------------------
| Routine to raise any or all of the software IEC/IEEE floating-point
| exception flags.
*----------------------------------------------------------------------------*/
BX_CPP_INLINE void float_raise(struct float_status_t *status, int flags)
{
status->float_exception_flags |= flags;
}
/*----------------------------------------------------------------------------
| Returns raised IEC/IEEE floating-point exception flags.
*----------------------------------------------------------------------------*/
BX_CPP_INLINE int get_exception_flags(const struct float_status_t *status)
{
return status->float_exception_flags & ~status->float_suppress_exception;
}
/*----------------------------------------------------------------------------
| Routine to check if any or all of the software IEC/IEEE floating-point
| exceptions are masked.
*----------------------------------------------------------------------------*/
BX_CPP_INLINE int float_exception_masked(const struct float_status_t *status, int flag)
{
return status->float_exception_masks & flag;
}
/*----------------------------------------------------------------------------
| Returns current floating point rounding mode specified by status word.
*----------------------------------------------------------------------------*/
BX_CPP_INLINE int get_float_rounding_mode(const struct float_status_t *status)
{
return status->float_rounding_mode;
}
/*----------------------------------------------------------------------------
| Returns current floating point precision (floatx80 only).
*----------------------------------------------------------------------------*/
#ifdef FLOATX80
BX_CPP_INLINE int get_float_rounding_precision(const struct float_status_t *status)
{
return status->float_rounding_precision;
}
#endif
/*----------------------------------------------------------------------------
| Returns current floating point NaN operands handling mode specified
| by status word.
*----------------------------------------------------------------------------*/
BX_CPP_INLINE int get_float_nan_handling_mode(const struct float_status_t *status)
{
return status->float_nan_handling_mode;
}
/*----------------------------------------------------------------------------
| Raise floating point precision lost up flag (floatx80 only).
*----------------------------------------------------------------------------*/
#ifdef FLOATX80
BX_CPP_INLINE void set_float_rounding_up(struct float_status_t *status)
{
status->float_exception_flags |= RAISE_SW_C1;
}
#endif
/*----------------------------------------------------------------------------
| Returns 1 if the <denormals-are-zeros> feature is supported;
| otherwise returns 0.
*----------------------------------------------------------------------------*/
BX_CPP_INLINE int get_denormals_are_zeros(const struct float_status_t *status)
{
return status->denormals_are_zeros;
}
/*----------------------------------------------------------------------------
| Returns 1 if the <flush-underflow-to-zero> feature is supported;
| otherwise returns 0.
*----------------------------------------------------------------------------*/
BX_CPP_INLINE int get_flush_underflow_to_zero(const struct float_status_t *status)
{
return status->flush_underflow_to_zero;
}
/*----------------------------------------------------------------------------
| Software IEC/IEEE integer-to-floating-point conversion routines.
*----------------------------------------------------------------------------*/
float32 int32_to_float32(Bit32s, struct float_status_t *status);
float64 int32_to_float64(Bit32s);
float32 int64_to_float32(Bit64s, struct float_status_t *status);
float64 int64_to_float64(Bit64s, struct float_status_t *status);
float32 uint32_to_float32(Bit32u, struct float_status_t *status);
float64 uint32_to_float64(Bit32u);
float32 uint64_to_float32(Bit64u, struct float_status_t *status);
float64 uint64_to_float64(Bit64u, struct float_status_t *status);
/*----------------------------------------------------------------------------
| Software IEC/IEEE single-precision conversion routines.
*----------------------------------------------------------------------------*/
Bit32s float32_to_int32(float32, struct float_status_t *status);
Bit32s float32_to_int32_round_to_zero(float32, struct float_status_t *status);
Bit64s float32_to_int64(float32, struct float_status_t *status);
Bit64s float32_to_int64_round_to_zero(float32, struct float_status_t *status);
Bit32u float32_to_uint32(float32, struct float_status_t *status);
Bit32u float32_to_uint32_round_to_zero(float32, struct float_status_t *status);
Bit64u float32_to_uint64(float32, struct float_status_t *status);
Bit64u float32_to_uint64_round_to_zero(float32, struct float_status_t *status);
float64 float32_to_float64(float32, struct float_status_t *status);
/*----------------------------------------------------------------------------
| Software IEC/IEEE single-precision operations.
*----------------------------------------------------------------------------*/
float32 float32_round_to_int(float32, Bit8u scale, struct float_status_t *status);
float32 float32_add(float32, float32, struct float_status_t *status);
float32 float32_sub(float32, float32, struct float_status_t *status);
float32 float32_mul(float32, float32, struct float_status_t *status);
float32 float32_div(float32, float32, struct float_status_t *status);
float32 float32_sqrt(float32, struct float_status_t *status);
float32 float32_frc(float32, struct float_status_t *status);
float32 float32_muladd(float32, float32, float32, int flags, struct float_status_t *status);
float32 float32_scalef(float32, float32, struct float_status_t *status);
int float32_compare(float32, float32, int quiet, struct float_status_t *status);
BX_CPP_INLINE float32 float32_round_to_int_one(float32 a, struct float_status_t *status)
{
return float32_round_to_int(a, 0, status);
}
BX_CPP_INLINE float32 float32_fmadd(float32 a, float32 b, float32 c, struct float_status_t *status)
{
return float32_muladd(a, b, c, 0, status);
}
BX_CPP_INLINE float32 float32_fmsub(float32 a, float32 b, float32 c, struct float_status_t *status)
{
return float32_muladd(a, b, c, float_muladd_negate_c, status);
}
BX_CPP_INLINE float32 float32_fnmadd(float32 a, float32 b, float32 c, struct float_status_t *status)
{
return float32_muladd(a, b, c, float_muladd_negate_product, status);
}
BX_CPP_INLINE float32 float32_fnmsub(float32 a, float32 b, float32 c, struct float_status_t *status)
{
return float32_muladd(a, b, c, float_muladd_negate_result, status);
}
BX_CPP_INLINE int float32_compare_two(float32 a, float32 b, struct float_status_t *status)
{
return float32_compare(a, b, 0, status);
}
BX_CPP_INLINE int float32_compare_quiet(float32 a, float32 b, struct float_status_t *status)
{
return float32_compare(a, b, 1, status);
}
float_class_t float32_class(float32);
float32 float32_min(float32 a, float32 b, struct float_status_t *status);
float32 float32_max(float32 a, float32 b, struct float_status_t *status);
float32 float32_minmax(float32 a, float32 b, int is_max, int is_abs, struct float_status_t *status);
float32 float32_getexp(float32 a, struct float_status_t *status);
float32 float32_getmant(float32 a, struct float_status_t *status, int sign_ctrl, int interv);
/*----------------------------------------------------------------------------
| Software IEC/IEEE double-precision conversion routines.
*----------------------------------------------------------------------------*/
Bit32s float64_to_int32(float64, struct float_status_t *status);
Bit32s float64_to_int32_round_to_zero(float64, struct float_status_t *status);
Bit64s float64_to_int64(float64, struct float_status_t *status);
Bit64s float64_to_int64_round_to_zero(float64, struct float_status_t *status);
Bit32u float64_to_uint32(float64, struct float_status_t *status);
Bit32u float64_to_uint32_round_to_zero(float64, struct float_status_t *status);
Bit64u float64_to_uint64(float64, struct float_status_t *status);
Bit64u float64_to_uint64_round_to_zero(float64, struct float_status_t *status);
float32 float64_to_float32(float64, struct float_status_t *status);
/*----------------------------------------------------------------------------
| Software IEC/IEEE double-precision operations.
*----------------------------------------------------------------------------*/
float64 float64_round_to_int(float64, Bit8u scale, struct float_status_t *status);
float64 float64_add(float64, float64, struct float_status_t *status);
float64 float64_sub(float64, float64, struct float_status_t *status);
float64 float64_mul(float64, float64, struct float_status_t *status);
float64 float64_div(float64, float64, struct float_status_t *status);
float64 float64_sqrt(float64, struct float_status_t *status);
float64 float64_frc(float64, struct float_status_t *status);
float64 float64_muladd(float64, float64, float64, int flags, struct float_status_t *status);
float64 float64_scalef(float64, float64, struct float_status_t *status);
int float64_compare(float64, float64, int quiet, struct float_status_t *status);
BX_CPP_INLINE float64 float64_round_to_int_one(float64 a, struct float_status_t *status)
{
return float64_round_to_int(a, 0, status);
}
BX_CPP_INLINE float64 float64_fmadd(float64 a, float64 b, float64 c, struct float_status_t *status)
{
return float64_muladd(a, b, c, 0, status);
}
BX_CPP_INLINE float64 float64_fmsub(float64 a, float64 b, float64 c, struct float_status_t *status)
{
return float64_muladd(a, b, c, float_muladd_negate_c, status);
}
BX_CPP_INLINE float64 float64_fnmadd(float64 a, float64 b, float64 c, struct float_status_t *status)
{
return float64_muladd(a, b, c, float_muladd_negate_product, status);
}
BX_CPP_INLINE float64 float64_fnmsub(float64 a, float64 b, float64 c, struct float_status_t *status)
{
return float64_muladd(a, b, c, float_muladd_negate_result, status);
}
BX_CPP_INLINE int float64_compare_two(float64 a, float64 b, struct float_status_t *status)
{
return float64_compare(a, b, 0, status);
}
BX_CPP_INLINE int float64_compare_quiet(float64 a, float64 b, struct float_status_t *status)
{
return float64_compare(a, b, 1, status);
}
float_class_t float64_class(float64);
float64 float64_min(float64 a, float64 b, struct float_status_t *status);
float64 float64_max(float64 a, float64 b, struct float_status_t *status);
float64 float64_minmax(float64 a, float64 b, int is_max, int is_abs, struct float_status_t *status);
float64 float64_getexp(float64 a, struct float_status_t *status);
float64 float64_getmant(float64 a, struct float_status_t *status, int sign_ctrl, int interv);
#ifdef FLOAT16
float32 float16_to_float32(float16, struct float_status_t *status);
float16 float32_to_float16(float32, struct float_status_t *status);
float_class_t float16_class(float16);
#endif
#ifdef FLOATX80
/*----------------------------------------------------------------------------
| Software IEC/IEEE floating-point types.
*----------------------------------------------------------------------------*/
#ifdef BX_BIG_ENDIAN
typedef struct floatx80 { // leave alignment to compiler
Bit16u exp;
Bit64u fraction;
}; floatx80
#else
typedef struct floatx80 {
Bit64u fraction;
Bit16u exp;
} floatx80;
#endif
#ifdef __cplusplus
extern "C" {
#endif
/*----------------------------------------------------------------------------
| Software IEC/IEEE integer-to-floating-point conversion routines.
*----------------------------------------------------------------------------*/
floatx80 int32_to_floatx80(Bit32s);
floatx80 int64_to_floatx80(Bit64s);
/*----------------------------------------------------------------------------
| Software IEC/IEEE extended double-precision conversion routines.
*----------------------------------------------------------------------------*/
floatx80 float32_to_floatx80(float32, struct float_status_t *status);
floatx80 float64_to_floatx80(float64, struct float_status_t *status);
Bit32s floatx80_to_int32(floatx80, struct float_status_t *status);
Bit32s floatx80_to_int32_round_to_zero(floatx80, struct float_status_t *status);
Bit64s floatx80_to_int64(floatx80, struct float_status_t *status);
Bit64s floatx80_to_int64_round_to_zero(floatx80, struct float_status_t *status);
float32 floatx80_to_float32(floatx80, struct float_status_t *status);
float64 floatx80_to_float64(floatx80, struct float_status_t *status);
/*----------------------------------------------------------------------------
| Software IEC/IEEE extended double-precision operations.
*----------------------------------------------------------------------------*/
floatx80 floatx80_round_to_int(floatx80, struct float_status_t *status);
floatx80 floatx80_add(floatx80, floatx80, struct float_status_t *status);
floatx80 floatx80_sub(floatx80, floatx80, struct float_status_t *status);
floatx80 floatx80_mul(floatx80, floatx80, struct float_status_t *status);
floatx80 floatx80_div(floatx80, floatx80, struct float_status_t *status);
floatx80 floatx80_sqrt(floatx80, struct float_status_t *status);
float_class_t floatx80_class(floatx80);
#ifdef __cplusplus
}
#endif
#endif /* FLOATX80 */
#ifdef FLOAT128
#ifdef BX_BIG_ENDIAN
typedef struct float128 {
Bit64u hi, lo;
} float128;
#else
typedef struct float128 {
Bit64u lo, hi;
} float128;
#endif
#ifdef __cplusplus
extern "C" {
#endif
/*----------------------------------------------------------------------------
| Software IEC/IEEE quadruple-precision conversion routines.
*----------------------------------------------------------------------------*/
float128 floatx80_to_float128(floatx80 a, struct float_status_t *status);
floatx80 float128_to_floatx80(float128 a, struct float_status_t *status);
float128 int64_to_float128(Bit64s a);
/*----------------------------------------------------------------------------
| Software IEC/IEEE extended double-precision operations.
*----------------------------------------------------------------------------*/
floatx80 floatx80_128_mul(floatx80 a, float128 b, struct float_status_t *status);
/*----------------------------------------------------------------------------
| Software IEC/IEEE quadruple-precision operations.
*----------------------------------------------------------------------------*/
float128 float128_add(float128 a, float128 b, struct float_status_t *status);
float128 float128_sub(float128 a, float128 b, struct float_status_t *status);
float128 float128_mul(float128 a, float128 b, struct float_status_t *status);
float128 float128_div(float128 a, float128 b, struct float_status_t *status);
#ifdef __cplusplus
}
#endif
#endif /* FLOAT128 */
#endif

View File

@@ -0,0 +1,129 @@
/*============================================================================
This C source file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic
Package, Release 2b.
Written by John R. Hauser. This work was made possible in part by the
International Computer Science Institute, located at Suite 600, 1947 Center
Street, Berkeley, California 94704. Funding was partially provided by the
National Science Foundation under grant MIP-9311980. The original version
of this code was written as part of a project to build a fixed-point vector
processor in collaboration with the University of California at Berkeley,
overseen by Profs. Nelson Morgan and John Wawrzynek. More information
is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
arithmetic/SoftFloat.html'.
THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has
been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
Derivative works are acceptable, even for commercial purposes, so long as
(1) the source code for the derivative work includes prominent notice that
the work is derivative, and (2) the source code includes prominent notice with
these four paragraphs for those parts of this code that are retained.
=============================================================================*/
/*============================================================================
* Adapted for Bochs (x86 achitecture simulator) by
* Stanislav Shwartsman [sshwarts at sourceforge net]
* ==========================================================================*/
#include "softfloat.h"
#ifdef FLOAT16
#include "softfloat-round-pack.h"
#include "softfloat-specialize.h"
#include "softfloat-macros.h"
/*----------------------------------------------------------------------------
| Determine half-precision floating-point number class
*----------------------------------------------------------------------------*/
float_class_t float16_class(float16 a)
{
Bit16s aExp = extractFloat16Exp(a);
Bit16u aSig = extractFloat16Frac(a);
int aSign = extractFloat16Sign(a);
if(aExp == 0x1F) {
if (aSig == 0)
return (aSign) ? float_negative_inf : float_positive_inf;
return (aSig & 0x200) ? float_QNaN : float_SNaN;
}
if(aExp == 0) {
if (aSig == 0) return float_zero;
return float_denormal;
}
return float_normalized;
}
/*----------------------------------------------------------------------------
| Returns the result of converting the half-precision floating-point value
| `a' to the single-precision floating-point format. The conversion is
| performed according to the IEC/IEEE Standard for Binary Floating-Point
| Arithmetic.
*----------------------------------------------------------------------------*/
float32 float16_to_float32(float16 a, struct float_status_t *status)
{
Bit16u aSig = extractFloat16Frac(a);
Bit16s aExp = extractFloat16Exp(a);
int aSign = extractFloat16Sign(a);
if (aExp == 0x1F) {
if (aSig) return commonNaNToFloat32(float16ToCommonNaN(a, status));
return packFloat32(aSign, 0xFF, 0);
}
if (aExp == 0) {
// ignore denormals_are_zeros flag
if (aSig == 0) return packFloat32(aSign, 0, 0);
float_raise(status, float_flag_denormal);
normalizeFloat16Subnormal(aSig, &aExp, &aSig);
--aExp;
}
return packFloat32(aSign, aExp + 0x70, ((Bit32u) aSig)<<13);
}
/*----------------------------------------------------------------------------
| Returns the result of converting the single-precision floating-point value
| `a' to the half-precision floating-point format. The conversion is
| performed according to the IEC/IEEE Standard for Binary Floating-Point
| Arithmetic.
*----------------------------------------------------------------------------*/
float16 float32_to_float16(float32 a, struct float_status_t *status)
{
Bit32u aSig = extractFloat32Frac(a);
Bit16s aExp = extractFloat32Exp(a);
int aSign = extractFloat32Sign(a);
if (aExp == 0xFF) {
if (aSig) return commonNaNToFloat16(float32ToCommonNaN(a, status));
return packFloat16(aSign, 0x1F, 0);
}
if (aExp == 0) {
if (get_denormals_are_zeros(status)) aSig = 0;
if (aSig == 0) return packFloat16(aSign, 0, 0);
float_raise(status, float_flag_denormal);
}
aSig = shift32RightJamming(aSig, 9);
Bit16u zSig = (Bit16u) aSig;
if (aExp || zSig) {
zSig |= 0x4000;
aExp -= 0x71;
}
return roundAndPackFloat16(aSign, aExp, zSig, status);
}
#endif

View File

@@ -0,0 +1,367 @@
/*============================================================================
This source file is an extension to the SoftFloat IEC/IEEE Floating-point
Arithmetic Package, Release 2b, written for Bochs (x86 achitecture simulator)
floating point emulation.
THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has
been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
Derivative works are acceptable, even for commercial purposes, so long as
(1) the source code for the derivative work includes prominent notice that
the work is derivative, and (2) the source code includes prominent notice with
these four paragraphs for those parts of this code that are retained.
=============================================================================*/
/*============================================================================
* Written for Bochs (x86 achitecture simulator) by
* Stanislav Shwartsman [sshwarts at sourceforge net]
* ==========================================================================*/
#include "softfloatx80.h"
#include "softfloat-round-pack.h"
#include "softfloat-macros.h"
const floatx80 Const_QNaN = packFloatx80(0, floatx80_default_nan_exp, floatx80_default_nan_fraction);
const floatx80 Const_Z = packFloatx80(0, 0x0000, 0);
const floatx80 Const_1 = packFloatx80(0, 0x3fff, BX_CONST64(0x8000000000000000));
const floatx80 Const_L2T = packFloatx80(0, 0x4000, BX_CONST64(0xd49a784bcd1b8afe));
const floatx80 Const_L2E = packFloatx80(0, 0x3fff, BX_CONST64(0xb8aa3b295c17f0bc));
const floatx80 Const_PI = packFloatx80(0, 0x4000, BX_CONST64(0xc90fdaa22168c235));
const floatx80 Const_LG2 = packFloatx80(0, 0x3ffd, BX_CONST64(0x9a209a84fbcff799));
const floatx80 Const_LN2 = packFloatx80(0, 0x3ffe, BX_CONST64(0xb17217f7d1cf79ac));
const floatx80 Const_INF = packFloatx80(0, 0x7fff, BX_CONST64(0x8000000000000000));
/*----------------------------------------------------------------------------
| Commonly used single-precision floating point constants
*----------------------------------------------------------------------------*/
const float32 float32_negative_inf = 0xff800000;
const float32 float32_positive_inf = 0x7f800000;
const float32 float32_negative_zero = 0x80000000;
const float32 float32_positive_zero = 0x00000000;
const float32 float32_negative_one = 0xbf800000;
const float32 float32_positive_one = 0x3f800000;
const float32 float32_max_float = 0x7f7fffff;
const float32 float32_min_float = 0xff7fffff;
/*----------------------------------------------------------------------------
| The pattern for a default generated single-precision NaN.
*----------------------------------------------------------------------------*/
const float32 float32_default_nan = 0xffc00000;
/*----------------------------------------------------------------------------
| Commonly used single-precision floating point constants
*----------------------------------------------------------------------------*/
const float64 float64_negative_inf = BX_CONST64(0xfff0000000000000);
const float64 float64_positive_inf = BX_CONST64(0x7ff0000000000000);
const float64 float64_negative_zero = BX_CONST64(0x8000000000000000);
const float64 float64_positive_zero = BX_CONST64(0x0000000000000000);
const float64 float64_negative_one = BX_CONST64(0xbff0000000000000);
const float64 float64_positive_one = BX_CONST64(0x3ff0000000000000);
const float64 float64_max_float = BX_CONST64(0x7fefffffffffffff);
const float64 float64_min_float = BX_CONST64(0xffefffffffffffff);
/*----------------------------------------------------------------------------
| The pattern for a default generated double-precision NaN.
*----------------------------------------------------------------------------*/
const float64 float64_default_nan = BX_CONST64(0xFFF8000000000000);
/*----------------------------------------------------------------------------
| Returns the result of converting the extended double-precision floating-
| point value `a' to the 16-bit two's complement integer format. The
| conversion is performed according to the IEC/IEEE Standard for Binary
| Floating-Point Arithmetic - which means in particular that the conversion
| is rounded according to the current rounding mode. If `a' is a NaN or the
| conversion overflows, the integer indefinite value is returned.
*----------------------------------------------------------------------------*/
Bit16s floatx80_to_int16(floatx80 a, struct float_status_t *status)
{
if (floatx80_is_unsupported(a)) {
float_raise(status, float_flag_invalid);
return int16_indefinite;
}
Bit32s v32 = floatx80_to_int32(a, status);
if ((v32 > 32767) || (v32 < -32768)) {
status->float_exception_flags = float_flag_invalid; // throw away other flags
return int16_indefinite;
}
return (Bit16s) v32;
}
/*----------------------------------------------------------------------------
| Returns the result of converting the extended double-precision floating-
| point value `a' to the 16-bit two's complement integer format. The
| conversion is performed according to the IEC/IEEE Standard for Binary
| Floating-Point Arithmetic, except that the conversion is always rounded
| toward zero. If `a' is a NaN or the conversion overflows, the integer
| indefinite value is returned.
*----------------------------------------------------------------------------*/
Bit16s floatx80_to_int16_round_to_zero(floatx80 a, struct float_status_t *status)
{
if (floatx80_is_unsupported(a)) {
float_raise(status, float_flag_invalid);
return int16_indefinite;
}
Bit32s v32 = floatx80_to_int32_round_to_zero(a, status);
if ((v32 > 32767) || (v32 < -32768)) {
status->float_exception_flags = float_flag_invalid; // throw away other flags
return int16_indefinite;
}
return (Bit16s) v32;
}
/*----------------------------------------------------------------------------
| Separate the source extended double-precision floating point value `a'
| into its exponent and significand, store the significant back to the
| 'a' and return the exponent. The operation performed is a superset of
| the IEC/IEEE recommended logb(x) function.
*----------------------------------------------------------------------------*/
floatx80 floatx80_extract(floatx80 *a, struct float_status_t *status)
{
/*----------------------------------------------------------------------------
| The pattern for a default generated extended double-precision NaN.
*----------------------------------------------------------------------------*/
const floatx80 floatx80_default_nan = packFloatx80(0, floatx80_default_nan_exp, floatx80_default_nan_fraction);
Bit64u aSig = extractFloatx80Frac(*a);
Bit32s aExp = extractFloatx80Exp(*a);
int aSign = extractFloatx80Sign(*a);
if (floatx80_is_unsupported(*a))
{
float_raise(status, float_flag_invalid);
*a = floatx80_default_nan;
return *a;
}
if (aExp == 0x7FFF) {
if ((Bit64u) (aSig<<1))
{
*a = propagateFloatx80NaNOne(*a, status);
return *a;
}
return packFloatx80(0, 0x7FFF, BX_CONST64(0x8000000000000000));
}
if (aExp == 0)
{
if (aSig == 0) {
float_raise(status, float_flag_divbyzero);
*a = packFloatx80(aSign, 0, 0);
return packFloatx80(1, 0x7FFF, BX_CONST64(0x8000000000000000));
}
float_raise(status, float_flag_denormal);
normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
}
a->exp = (aSign << 15) + 0x3FFF;
a->fraction = aSig;
return int32_to_floatx80(aExp - 0x3FFF);
}
/*----------------------------------------------------------------------------
| Scales extended double-precision floating-point value in operand `a' by
| value `b'. The function truncates the value in the second operand 'b' to
| an integral value and adds that value to the exponent of the operand 'a'.
| The operation performed according to the IEC/IEEE Standard for Binary
| Floating-Point Arithmetic.
*----------------------------------------------------------------------------*/
floatx80 floatx80_scale(floatx80 a, floatx80 b, struct float_status_t *status)
{
/*----------------------------------------------------------------------------
| The pattern for a default generated extended double-precision NaN.
*----------------------------------------------------------------------------*/
const floatx80 floatx80_default_nan = packFloatx80(0, floatx80_default_nan_exp, floatx80_default_nan_fraction);
Bit32s aExp, bExp;
Bit64u aSig, bSig;
// handle unsupported extended double-precision floating encodings
if (floatx80_is_unsupported(a) || floatx80_is_unsupported(b))
{
float_raise(status, float_flag_invalid);
return floatx80_default_nan;
}
aSig = extractFloatx80Frac(a);
aExp = extractFloatx80Exp(a);
int aSign = extractFloatx80Sign(a);
bSig = extractFloatx80Frac(b);
bExp = extractFloatx80Exp(b);
int bSign = extractFloatx80Sign(b);
if (aExp == 0x7FFF) {
if ((Bit64u) (aSig<<1) || ((bExp == 0x7FFF) && (Bit64u) (bSig<<1)))
{
return propagateFloatx80NaN(a, b, status);
}
if ((bExp == 0x7FFF) && bSign) {
float_raise(status, float_flag_invalid);
return floatx80_default_nan;
}
if (bSig && (bExp == 0)) float_raise(status, float_flag_denormal);
return a;
}
if (bExp == 0x7FFF) {
if ((Bit64u) (bSig<<1)) return propagateFloatx80NaN(a, b, status);
if ((aExp | aSig) == 0) {
if (! bSign) {
float_raise(status, float_flag_invalid);
return floatx80_default_nan;
}
return a;
}
if (aSig && (aExp == 0)) float_raise(status, float_flag_denormal);
if (bSign) return packFloatx80(aSign, 0, 0);
return packFloatx80(aSign, 0x7FFF, BX_CONST64(0x8000000000000000));
}
if (aExp == 0) {
if (bSig && (bExp == 0)) float_raise(status, float_flag_denormal);
if (aSig == 0) return a;
float_raise(status, float_flag_denormal);
normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
if (bExp < 0x3FFF)
return normalizeRoundAndPackFloatx80(80, aSign, aExp, aSig, 0, status);
}
if (bExp == 0) {
if (bSig == 0) return a;
float_raise(status, float_flag_denormal);
normalizeFloatx80Subnormal(bSig, &bExp, &bSig);
}
if (bExp > 0x400E) {
/* generate appropriate overflow/underflow */
return roundAndPackFloatx80(80, aSign,
bSign ? -0x3FFF : 0x7FFF, aSig, 0, status);
}
if (bExp < 0x3FFF) return a;
int shiftCount = 0x403E - bExp;
bSig >>= shiftCount;
Bit32s scale = (Bit32s) bSig;
if (bSign) scale = -scale; /* -32768..32767 */
return
roundAndPackFloatx80(80, aSign, aExp+scale, aSig, 0, status);
}
/*----------------------------------------------------------------------------
| Determine extended-precision floating-point number class.
*----------------------------------------------------------------------------*/
float_class_t floatx80_class(floatx80 a)
{
Bit32s aExp = extractFloatx80Exp(a);
Bit64u aSig = extractFloatx80Frac(a);
if(aExp == 0) {
if (aSig == 0)
return float_zero;
/* denormal or pseudo-denormal */
return float_denormal;
}
/* valid numbers have the MS bit set */
if (!(aSig & BX_CONST64(0x8000000000000000)))
return float_SNaN; /* report unsupported as SNaNs */
if(aExp == 0x7fff) {
int aSign = extractFloatx80Sign(a);
if (((Bit64u) (aSig<< 1)) == 0)
return (aSign) ? float_negative_inf : float_positive_inf;
return (aSig & BX_CONST64(0x4000000000000000)) ? float_QNaN : float_SNaN;
}
return float_normalized;
}
/*----------------------------------------------------------------------------
| Compare between two extended precision floating point numbers. Returns
| 'float_relation_equal' if the operands are equal, 'float_relation_less' if
| the value 'a' is less than the corresponding value `b',
| 'float_relation_greater' if the value 'a' is greater than the corresponding
| value `b', or 'float_relation_unordered' otherwise.
*----------------------------------------------------------------------------*/
int floatx80_compare(floatx80 a, floatx80 b, int quiet, struct float_status_t *status)
{
float_class_t aClass = floatx80_class(a);
float_class_t bClass = floatx80_class(b);
if (aClass == float_SNaN || bClass == float_SNaN)
{
/* unsupported reported as SNaN */
float_raise(status, float_flag_invalid);
return float_relation_unordered;
}
if (aClass == float_QNaN || bClass == float_QNaN) {
if (! quiet) float_raise(status, float_flag_invalid);
return float_relation_unordered;
}
if (aClass == float_denormal || bClass == float_denormal) {
float_raise(status, float_flag_denormal);
}
int aSign = extractFloatx80Sign(a);
int bSign = extractFloatx80Sign(b);
if (aClass == float_zero) {
if (bClass == float_zero) return float_relation_equal;
return bSign ? float_relation_greater : float_relation_less;
}
if (bClass == float_zero || aSign != bSign) {
return aSign ? float_relation_less : float_relation_greater;
}
Bit64u aSig = extractFloatx80Frac(a);
Bit32s aExp = extractFloatx80Exp(a);
Bit64u bSig = extractFloatx80Frac(b);
Bit32s bExp = extractFloatx80Exp(b);
if (aClass == float_denormal)
normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
if (bClass == float_denormal)
normalizeFloatx80Subnormal(bSig, &bExp, &bSig);
if (aExp == bExp && aSig == bSig)
return float_relation_equal;
int less_than =
aSign ? ((bExp < aExp) || ((bExp == aExp) && (bSig < aSig)))
: ((aExp < bExp) || ((aExp == bExp) && (aSig < bSig)));
if (less_than) return float_relation_less;
return float_relation_greater;
}
int floatx80_compare_two(floatx80 a, floatx80 b, struct float_status_t *status)
{
return floatx80_compare(a, b, 0, status);
}
int floatx80_compare_quiet(floatx80 a, floatx80 b, struct float_status_t *status)
{
return floatx80_compare(a, b, 1, status);
}

View File

@@ -0,0 +1,113 @@
/*============================================================================
This source file is an extension to the SoftFloat IEC/IEEE Floating-point
Arithmetic Package, Release 2b, written for Bochs (x86 achitecture simulator)
floating point emulation.
THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has
been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
Derivative works are acceptable, even for commercial purposes, so long as
(1) the source code for the derivative work includes prominent notice that
the work is derivative, and (2) the source code includes prominent notice with
these four paragraphs for those parts of this code that are retained.
=============================================================================*/
/*============================================================================
* Written for Bochs (x86 achitecture simulator) by
* Stanislav Shwartsman [sshwarts at sourceforge net]
* ==========================================================================*/
#ifndef _SOFTFLOATX80_EXTENSIONS_H_
#define _SOFTFLOATX80_EXTENSIONS_H_
#include "softfloat.h"
#include "softfloat-specialize.h"
/*----------------------------------------------------------------------------
| Software IEC/IEEE integer-to-floating-point conversion routines.
*----------------------------------------------------------------------------*/
#ifdef __cplusplus
extern "C" {
#endif
Bit16s floatx80_to_int16(floatx80, struct float_status_t *status);
Bit16s floatx80_to_int16_round_to_zero(floatx80, struct float_status_t *status);
/*----------------------------------------------------------------------------
| Software IEC/IEEE extended double-precision operations.
*----------------------------------------------------------------------------*/
floatx80 floatx80_extract(floatx80 *a, struct float_status_t *status);
floatx80 floatx80_scale(floatx80 a, floatx80 b, struct float_status_t *status);
int floatx80_remainder(floatx80 a, floatx80 b, floatx80 *r, Bit64u *q, struct float_status_t *status);
int floatx80_ieee754_remainder(floatx80 a, floatx80 b, floatx80 *r, Bit64u *q, struct float_status_t *status);
floatx80 f2xm1(floatx80 a, struct float_status_t *status);
floatx80 fyl2x(floatx80 a, floatx80 b, struct float_status_t *status);
floatx80 fyl2xp1(floatx80 a, floatx80 b, struct float_status_t *status);
floatx80 fpatan(floatx80 a, floatx80 b, struct float_status_t *status);
/*----------------------------------------------------------------------------
| Software IEC/IEEE extended double-precision trigonometric functions.
*----------------------------------------------------------------------------*/
int fsincos(floatx80 a, floatx80 *sin_a, floatx80 *cos_a, struct float_status_t *status);
int fsin(floatx80 *a, struct float_status_t *status);
int fcos(floatx80 *a, struct float_status_t *status);
int ftan(floatx80 *a, struct float_status_t *status);
/*----------------------------------------------------------------------------
| Software IEC/IEEE extended double-precision compare.
*----------------------------------------------------------------------------*/
int floatx80_compare(floatx80, floatx80, int quiet, struct float_status_t *status);
int floatx80_compare_two(floatx80 a, floatx80 b, struct float_status_t *status);
int floatx80_compare_quiet(floatx80 a, floatx80 b, struct float_status_t *status);
#ifdef __cplusplus
}
#endif
/*-----------------------------------------------------------------------------
| Calculates the absolute value of the extended double-precision floating-point
| value `a'. The operation is performed according to the IEC/IEEE Standard
| for Binary Floating-Point Arithmetic.
*----------------------------------------------------------------------------*/
BX_CPP_INLINE floatx80 floatx80_abs(floatx80 reg)
{
reg.exp &= 0x7FFF;
return reg;
}
/*-----------------------------------------------------------------------------
| Changes the sign of the extended double-precision floating-point value 'a'.
| The operation is performed according to the IEC/IEEE Standard for Binary
| Floating-Point Arithmetic.
*----------------------------------------------------------------------------*/
BX_CPP_INLINE floatx80 floatx80_chs(floatx80 reg)
{
reg.exp ^= 0x8000;
return reg;
}
/*-----------------------------------------------------------------------------
| Commonly used extended double-precision floating-point constants.
*----------------------------------------------------------------------------*/
extern const floatx80 Const_Z;
extern const floatx80 Const_1;
extern const floatx80 Const_L2T;
extern const floatx80 Const_L2E;
extern const floatx80 Const_PI;
extern const floatx80 Const_LG2;
extern const floatx80 Const_LN2;
extern const floatx80 Const_INF;
#endif

View File

@@ -0,0 +1,750 @@
#define sf_FPU(name, optype, a_size, load_var, rw, use_var, is_nan, cycle_postfix) \
static int sf_FADD##name##_a##a_size(uint32_t fetchdat) \
{ \
floatx80 a, result; \
struct float_status_t status; \
optype temp; \
FP_ENTER(); \
FPU_check_pending_exceptions(); \
fetch_ea_##a_size(fetchdat); \
SEG_CHECK_READ(cpu_state.ea_seg); \
load_var = rw; \
if (cpu_state.abrt) \
return 1;\
clear_C1(); \
if (IS_TAG_EMPTY(0)) { \
FPU_stack_underflow(fetchdat, 0, 0); \
goto next_ins; \
} \
status = i387cw_to_softfloat_status_word(i387_get_control_word()); \
a = FPU_read_regi(0); \
if (!is_nan) \
result = floatx80_add(a, use_var, &status); \
\
if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) \
FPU_save_regi(result, 0); \
\
next_ins: \
CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fadd##cycle_postfix) : ((x87_timings.fadd##cycle_postfix) * cpu_multi)); \
CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fadd##cycle_postfix) : ((x87_concurrency.fadd##cycle_postfix) * cpu_multi)); \
return 0; \
} \
static int sf_FDIV##name##_a##a_size(uint32_t fetchdat) \
{ \
floatx80 a, result; \
struct float_status_t status; \
optype temp; \
FP_ENTER(); \
FPU_check_pending_exceptions(); \
fetch_ea_##a_size(fetchdat); \
SEG_CHECK_READ(cpu_state.ea_seg); \
load_var = rw; \
if (cpu_state.abrt) \
return 1;\
clear_C1(); \
if (IS_TAG_EMPTY(0)) { \
FPU_stack_underflow(fetchdat, 0, 0); \
goto next_ins; \
} \
status = i387cw_to_softfloat_status_word(i387_get_control_word()); \
a = FPU_read_regi(0); \
if (!is_nan) { \
result = floatx80_div(a, use_var, &status); \
} \
if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) \
FPU_save_regi(result, 0); \
\
next_ins: \
CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fdiv##cycle_postfix) : ((x87_timings.fdiv##cycle_postfix) * cpu_multi)); \
CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fadd##cycle_postfix) : ((x87_concurrency.fadd##cycle_postfix) * cpu_multi)); \
return 0; \
} \
static int sf_FDIVR##name##_a##a_size(uint32_t fetchdat) \
{ \
floatx80 a, result; \
struct float_status_t status; \
optype temp; \
FP_ENTER(); \
FPU_check_pending_exceptions(); \
fetch_ea_##a_size(fetchdat); \
SEG_CHECK_READ(cpu_state.ea_seg); \
load_var = rw; \
if (cpu_state.abrt) \
return 1;\
clear_C1(); \
if (IS_TAG_EMPTY(0)) { \
FPU_stack_underflow(fetchdat, 0, 0); \
goto next_ins; \
} \
status = i387cw_to_softfloat_status_word(i387_get_control_word()); \
a = FPU_read_regi(0); \
if (!is_nan) { \
result = floatx80_div(use_var, a, &status); \
} \
if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) \
FPU_save_regi(result, 0); \
\
next_ins: \
CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fdiv##cycle_postfix) : ((x87_timings.fdiv##cycle_postfix) * cpu_multi)); \
CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fdiv##cycle_postfix) : ((x87_concurrency.fdiv##cycle_postfix) * cpu_multi)); \
return 0; \
} \
static int sf_FMUL##name##_a##a_size(uint32_t fetchdat) \
{ \
floatx80 a, result; \
struct float_status_t status; \
optype temp; \
FP_ENTER(); \
FPU_check_pending_exceptions(); \
fetch_ea_##a_size(fetchdat); \
SEG_CHECK_READ(cpu_state.ea_seg); \
load_var = rw; \
if (cpu_state.abrt) \
return 1;\
clear_C1(); \
if (IS_TAG_EMPTY(0)) { \
FPU_stack_underflow(fetchdat, 0, 0); \
goto next_ins; \
} \
status = i387cw_to_softfloat_status_word(i387_get_control_word()); \
a = FPU_read_regi(0); \
if (!is_nan) { \
result = floatx80_mul(a, use_var, &status); \
} \
if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) \
FPU_save_regi(result, 0); \
\
next_ins: \
CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fmul##cycle_postfix) : ((x87_timings.fmul##cycle_postfix) * cpu_multi)); \
CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fmul##cycle_postfix) : ((x87_concurrency.fmul##cycle_postfix) * cpu_multi)); \
return 0; \
} \
static int sf_FSUB##name##_a##a_size(uint32_t fetchdat) \
{ \
floatx80 a, result; \
struct float_status_t status; \
optype temp; \
FP_ENTER(); \
FPU_check_pending_exceptions(); \
fetch_ea_##a_size(fetchdat); \
SEG_CHECK_READ(cpu_state.ea_seg); \
load_var = rw; \
if (cpu_state.abrt) \
return 1;\
clear_C1(); \
if (IS_TAG_EMPTY(0)) { \
FPU_stack_underflow(fetchdat, 0, 0); \
goto next_ins; \
} \
status = i387cw_to_softfloat_status_word(i387_get_control_word()); \
a = FPU_read_regi(0); \
if (!is_nan) \
result = floatx80_sub(a, use_var, &status); \
\
if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) \
FPU_save_regi(result, 0); \
\
next_ins: \
CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fadd##cycle_postfix) : ((x87_timings.fadd##cycle_postfix) * cpu_multi)); \
CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fadd##cycle_postfix) : ((x87_concurrency.fadd##cycle_postfix) * cpu_multi)); \
return 0; \
} \
static int sf_FSUBR##name##_a##a_size(uint32_t fetchdat) \
{ \
floatx80 a, result; \
struct float_status_t status; \
optype temp; \
FP_ENTER(); \
FPU_check_pending_exceptions(); \
fetch_ea_##a_size(fetchdat); \
SEG_CHECK_READ(cpu_state.ea_seg); \
load_var = rw; \
if (cpu_state.abrt) \
return 1;\
clear_C1(); \
if (IS_TAG_EMPTY(0)) { \
FPU_stack_underflow(fetchdat, 0, 0); \
goto next_ins; \
} \
status = i387cw_to_softfloat_status_word(i387_get_control_word()); \
a = FPU_read_regi(0); \
if (!is_nan) \
result = floatx80_sub(use_var, a, &status); \
\
if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) \
FPU_save_regi(result, 0); \
\
next_ins: \
CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fadd##cycle_postfix) : ((x87_timings.fadd##cycle_postfix) * cpu_multi)); \
CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fadd##cycle_postfix) : ((x87_concurrency.fadd##cycle_postfix) * cpu_multi)); \
return 0; \
}
// clang-format off
sf_FPU(s, float32, 16, temp, geteal(), float32_to_floatx80(temp, &status), FPU_handle_NaN32(a, temp, &result, &status), _32)
#ifndef FPU_8087
sf_FPU(s, float32, 32, temp, geteal(), float32_to_floatx80(temp, &status), FPU_handle_NaN32(a, temp, &result, &status), _32)
#endif
sf_FPU(d, float64, 16, temp, geteaq(), float64_to_floatx80(temp, &status), FPU_handle_NaN64(a, temp, &result, &status), _64)
#ifndef FPU_8087
sf_FPU(d, float64, 32, temp, geteaq(), float64_to_floatx80(temp, &status), FPU_handle_NaN64(a, temp, &result, &status), _64)
#endif
sf_FPU(iw, uint16_t, 16, temp, geteaw(), int32_to_floatx80((int16_t)temp), 0, _i16)
#ifndef FPU_8087
sf_FPU(iw, uint16_t, 32, temp, geteaw(), int32_to_floatx80((int16_t)temp), 0, _i16)
#endif
sf_FPU(il, uint32_t, 16, temp, geteal(), int32_to_floatx80((int32_t)temp), 0, _i32)
#ifndef FPU_8087
sf_FPU(il, uint32_t, 32, temp, geteal(), int32_to_floatx80((int32_t)temp), 0, _i32)
#endif
// clang-format on
static int
sf_FADD_st0_stj(uint32_t fetchdat)
{
floatx80 a, b, result;
struct float_status_t status;
FP_ENTER();
FPU_check_pending_exceptions();
cpu_state.pc++;
clear_C1();
if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(fetchdat & 7)) {
FPU_stack_underflow(fetchdat, 0, 0);
goto next_ins;
}
status = i387cw_to_softfloat_status_word(i387_get_control_word());
a = FPU_read_regi(0);
b = FPU_read_regi(fetchdat & 7);
result = floatx80_add(a, b, &status);
if (!FPU_exception(fetchdat, status.float_exception_flags, 0))
FPU_save_regi(result, 0);
next_ins:
CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fadd) : (x87_timings.fadd * cpu_multi));
CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fadd) : (x87_concurrency.fadd * cpu_multi));
return 0;
}
static int
sf_FADD_sti_st0(uint32_t fetchdat)
{
floatx80 a, b, result;
struct float_status_t status;
FP_ENTER();
FPU_check_pending_exceptions();
cpu_state.pc++;
clear_C1();
if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(fetchdat & 7)) {
FPU_stack_underflow(fetchdat, fetchdat & 7, 0);
goto next_ins;
}
status = i387cw_to_softfloat_status_word(i387_get_control_word());
a = FPU_read_regi(fetchdat & 7);
b = FPU_read_regi(0);
result = floatx80_add(a, b, &status);
if (!FPU_exception(fetchdat, status.float_exception_flags, 0))
FPU_save_regi(result, fetchdat & 7);
next_ins:
CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fadd) : (x87_timings.fadd * cpu_multi));
CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fadd) : (x87_concurrency.fadd * cpu_multi));
return 0;
}
static int
sf_FADDP_sti_st0(uint32_t fetchdat)
{
floatx80 a, b, result;
struct float_status_t status;
FP_ENTER();
FPU_check_pending_exceptions();
cpu_state.pc++;
clear_C1();
if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(fetchdat & 7)) {
FPU_stack_underflow(fetchdat, fetchdat & 7, 1);
goto next_ins;
}
status = i387cw_to_softfloat_status_word(i387_get_control_word());
a = FPU_read_regi(fetchdat & 7);
b = FPU_read_regi(0);
result = floatx80_add(a, b, &status);
if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) {
FPU_save_regi(result, fetchdat & 7);
FPU_pop();
}
next_ins:
CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fadd) : (x87_timings.fadd * cpu_multi));
CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fadd) : (x87_concurrency.fadd * cpu_multi));
return 0;
}
static int
sf_FDIV_st0_stj(uint32_t fetchdat)
{
floatx80 a, b, result;
struct float_status_t status;
FP_ENTER();
cpu_state.pc++;
clear_C1();
if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(fetchdat & 7)) {
FPU_stack_underflow(fetchdat, 0, 0);
goto next_ins;
}
status = i387cw_to_softfloat_status_word(i387_get_control_word());
a = FPU_read_regi(0);
b = FPU_read_regi(fetchdat & 7);
result = floatx80_div(a, b, &status);
if (!FPU_exception(fetchdat, status.float_exception_flags, 0))
FPU_save_regi(result, 0);
next_ins:
CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fdiv) : (x87_timings.fdiv * cpu_multi));
CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fdiv) : (x87_concurrency.fdiv * cpu_multi));
return 0;
}
static int
sf_FDIV_sti_st0(uint32_t fetchdat)
{
floatx80 a, b, result;
struct float_status_t status;
FP_ENTER();
cpu_state.pc++;
clear_C1();
if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(fetchdat & 7)) {
FPU_stack_underflow(fetchdat, fetchdat & 7, 0);
goto next_ins;
}
status = i387cw_to_softfloat_status_word(i387_get_control_word());
a = FPU_read_regi(fetchdat & 7);
b = FPU_read_regi(0);
result = floatx80_div(a, b, &status);
if (!FPU_exception(fetchdat, status.float_exception_flags, 0))
FPU_save_regi(result, fetchdat & 7);
next_ins:
CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fdiv) : (x87_timings.fdiv * cpu_multi));
CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fdiv) : (x87_concurrency.fdiv * cpu_multi));
return 0;
}
static int
sf_FDIVP_sti_st0(uint32_t fetchdat)
{
floatx80 a, b, result;
struct float_status_t status;
FP_ENTER();
cpu_state.pc++;
clear_C1();
if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(fetchdat & 7)) {
FPU_stack_underflow(fetchdat, fetchdat & 7, 1);
goto next_ins;
}
status = i387cw_to_softfloat_status_word(i387_get_control_word());
a = FPU_read_regi(fetchdat & 7);
b = FPU_read_regi(0);
result = floatx80_div(a, b, &status);
if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) {
FPU_save_regi(result, fetchdat & 7);
FPU_pop();
}
next_ins:
CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fdiv) : (x87_timings.fdiv * cpu_multi));
CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fdiv) : (x87_concurrency.fdiv * cpu_multi));
return 0;
}
static int
sf_FDIVR_st0_stj(uint32_t fetchdat)
{
floatx80 a, b, result;
struct float_status_t status;
FP_ENTER();
cpu_state.pc++;
clear_C1();
if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(fetchdat & 7)) {
FPU_stack_underflow(fetchdat, 0, 0);
goto next_ins;
}
status = i387cw_to_softfloat_status_word(i387_get_control_word());
a = FPU_read_regi(fetchdat & 7);
b = FPU_read_regi(0);
result = floatx80_div(a, b, &status);
if (!FPU_exception(fetchdat, status.float_exception_flags, 0))
FPU_save_regi(result, 0);
next_ins:
CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fdiv) : (x87_timings.fdiv * cpu_multi));
CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fdiv) : (x87_concurrency.fdiv * cpu_multi));
return 0;
}
static int
sf_FDIVR_sti_st0(uint32_t fetchdat)
{
floatx80 a, b, result;
struct float_status_t status;
FP_ENTER();
cpu_state.pc++;
clear_C1();
if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(fetchdat & 7)) {
FPU_stack_underflow(fetchdat, fetchdat & 7, 0);
goto next_ins;
}
status = i387cw_to_softfloat_status_word(i387_get_control_word());
a = FPU_read_regi(0);
b = FPU_read_regi(fetchdat & 7);
result = floatx80_div(a, b, &status);
if (!FPU_exception(fetchdat, status.float_exception_flags, 0))
FPU_save_regi(result, fetchdat & 7);
next_ins:
CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fdiv) : (x87_timings.fdiv * cpu_multi));
CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fdiv) : (x87_concurrency.fdiv * cpu_multi));
return 0;
}
static int
sf_FDIVRP_sti_st0(uint32_t fetchdat)
{
floatx80 a, b, result;
struct float_status_t status;
FP_ENTER();
cpu_state.pc++;
clear_C1();
if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(fetchdat & 7)) {
FPU_stack_underflow(fetchdat, fetchdat & 7, 1);
goto next_ins;
}
status = i387cw_to_softfloat_status_word(i387_get_control_word());
a = FPU_read_regi(0);
b = FPU_read_regi(fetchdat & 7);
result = floatx80_div(a, b, &status);
if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) {
FPU_save_regi(result, fetchdat & 7);
FPU_pop();
}
next_ins:
CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fdiv) : (x87_timings.fdiv * cpu_multi));
CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fdiv) : (x87_concurrency.fdiv * cpu_multi));
return 0;
}
static int
sf_FMUL_st0_stj(uint32_t fetchdat)
{
floatx80 a, b, result;
struct float_status_t status;
FP_ENTER();
cpu_state.pc++;
clear_C1();
if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(fetchdat & 7)) {
FPU_stack_underflow(fetchdat, 0, 0);
goto next_ins;
}
status = i387cw_to_softfloat_status_word(i387_get_control_word());
a = FPU_read_regi(0);
b = FPU_read_regi(fetchdat & 7);
result = floatx80_mul(a, b, &status);
if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) {
FPU_save_regi(result, 0);
}
next_ins:
CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fmul) : (x87_timings.fmul * cpu_multi));
CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fmul) : (x87_concurrency.fmul * cpu_multi));
return 0;
}
static int
sf_FMUL_sti_st0(uint32_t fetchdat)
{
floatx80 a, b, result;
struct float_status_t status;
FP_ENTER();
cpu_state.pc++;
clear_C1();
if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(fetchdat & 7)) {
FPU_stack_underflow(fetchdat, fetchdat & 7, 0);
goto next_ins;
}
status = i387cw_to_softfloat_status_word(i387_get_control_word());
a = FPU_read_regi(0);
b = FPU_read_regi(fetchdat & 7);
result = floatx80_mul(a, b, &status);
if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) {
FPU_save_regi(result, fetchdat & 7);
}
next_ins:
CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fmul) : (x87_timings.fmul * cpu_multi));
CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fmul) : (x87_concurrency.fmul * cpu_multi));
return 0;
}
static int
sf_FMULP_sti_st0(uint32_t fetchdat)
{
floatx80 a, b, result;
struct float_status_t status;
FP_ENTER();
cpu_state.pc++;
clear_C1();
if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(fetchdat & 7)) {
FPU_stack_underflow(fetchdat, fetchdat & 7, 1);
goto next_ins;
}
status = i387cw_to_softfloat_status_word(i387_get_control_word());
a = FPU_read_regi(fetchdat & 7);
b = FPU_read_regi(0);
result = floatx80_mul(a, b, &status);
if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) {
FPU_save_regi(result, fetchdat & 7);
FPU_pop();
}
next_ins:
CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fmul) : (x87_timings.fmul * cpu_multi));
CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fmul) : (x87_concurrency.fmul * cpu_multi));
return 0;
}
static int
sf_FSUB_st0_stj(uint32_t fetchdat)
{
floatx80 a, b, result;
struct float_status_t status;
FP_ENTER();
cpu_state.pc++;
clear_C1();
if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(fetchdat & 7)) {
FPU_stack_underflow(fetchdat, 0, 0);
goto next_ins;
}
status = i387cw_to_softfloat_status_word(i387_get_control_word());
a = FPU_read_regi(0);
b = FPU_read_regi(fetchdat & 7);
result = floatx80_sub(a, b, &status);
if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) {
FPU_save_regi(result, 0);
}
next_ins:
CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fadd) : (x87_timings.fadd * cpu_multi));
CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fadd) : (x87_concurrency.fadd * cpu_multi));
return 0;
}
static int
sf_FSUB_sti_st0(uint32_t fetchdat)
{
floatx80 a, b, result;
struct float_status_t status;
FP_ENTER();
cpu_state.pc++;
clear_C1();
if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(fetchdat & 7)) {
FPU_stack_underflow(fetchdat, fetchdat & 7, 0);
goto next_ins;
}
status = i387cw_to_softfloat_status_word(i387_get_control_word());
a = FPU_read_regi(fetchdat & 7);
b = FPU_read_regi(0);
result = floatx80_sub(a, b, &status);
if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) {
FPU_save_regi(result, fetchdat & 7);
}
next_ins:
CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fadd) : (x87_timings.fadd * cpu_multi));
CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fadd) : (x87_concurrency.fadd * cpu_multi));
return 0;
}
static int
sf_FSUBP_sti_st0(uint32_t fetchdat)
{
floatx80 a, b, result;
struct float_status_t status;
FP_ENTER();
cpu_state.pc++;
clear_C1();
if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(fetchdat & 7)) {
FPU_stack_underflow(fetchdat, fetchdat & 7, 1);
goto next_ins;
}
status = i387cw_to_softfloat_status_word(i387_get_control_word());
a = FPU_read_regi(fetchdat & 7);
b = FPU_read_regi(0);
result = floatx80_sub(a, b, &status);
if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) {
FPU_save_regi(result, fetchdat & 7);
FPU_pop();
}
next_ins:
CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fadd) : (x87_timings.fadd * cpu_multi));
CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fadd) : (x87_concurrency.fadd * cpu_multi));
return 0;
}
static int
sf_FSUBR_st0_stj(uint32_t fetchdat)
{
floatx80 a, b, result;
struct float_status_t status;
FP_ENTER();
cpu_state.pc++;
clear_C1();
if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(fetchdat & 7)) {
FPU_stack_underflow(fetchdat, 0, 0);
goto next_ins;
}
status = i387cw_to_softfloat_status_word(i387_get_control_word());
a = FPU_read_regi(fetchdat & 7);
b = FPU_read_regi(0);
result = floatx80_sub(a, b, &status);
if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) {
FPU_save_regi(result, 0);
}
next_ins:
CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fadd) : (x87_timings.fadd * cpu_multi));
CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fadd) : (x87_concurrency.fadd * cpu_multi));
return 0;
}
static int
sf_FSUBR_sti_st0(uint32_t fetchdat)
{
floatx80 a, b, result;
struct float_status_t status;
FP_ENTER();
cpu_state.pc++;
clear_C1();
if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(fetchdat & 7)) {
FPU_stack_underflow(fetchdat, fetchdat & 7, 0);
goto next_ins;
}
status = i387cw_to_softfloat_status_word(i387_get_control_word());
a = FPU_read_regi(0);
b = FPU_read_regi(fetchdat & 7);
result = floatx80_sub(a, b, &status);
if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) {
FPU_save_regi(result, fetchdat & 7);
}
next_ins:
CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fadd) : (x87_timings.fadd * cpu_multi));
CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fadd) : (x87_concurrency.fadd * cpu_multi));
return 0;
}
static int
sf_FSUBRP_sti_st0(uint32_t fetchdat)
{
floatx80 a, b, result;
struct float_status_t status;
FP_ENTER();
cpu_state.pc++;
clear_C1();
if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(fetchdat & 7)) {
FPU_stack_underflow(fetchdat, fetchdat & 7, 1);
goto next_ins;
}
status = i387cw_to_softfloat_status_word(i387_get_control_word());
a = FPU_read_regi(0);
b = FPU_read_regi(fetchdat & 7);
result = floatx80_sub(a, b, &status);
if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) {
FPU_save_regi(result, fetchdat & 7);
FPU_pop();
}
next_ins:
CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fadd) : (x87_timings.fadd * cpu_multi));
CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fadd) : (x87_concurrency.fadd * cpu_multi));
return 0;
}
static int
sf_FSQRT(uint32_t fetchdat)
{
floatx80 result;
struct float_status_t status;
FP_ENTER();
cpu_state.pc++;
clear_C1();
if (IS_TAG_EMPTY(0)) {
FPU_stack_underflow(fetchdat, 0, 0);
goto next_ins;
}
status = i387cw_to_softfloat_status_word(i387_get_control_word());
result = floatx80_sqrt(FPU_read_regi(0), &status);
if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) {
FPU_save_regi(result, 0);
}
next_ins:
CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fsqrt) : (x87_timings.fsqrt * cpu_multi));
CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fsqrt) : (x87_concurrency.fsqrt * cpu_multi));
return 0;
}
static int
sf_FRNDINT(uint32_t fetchdat)
{
floatx80 result;
struct float_status_t status;
FP_ENTER();
cpu_state.pc++;
clear_C1();
if (IS_TAG_EMPTY(0)) {
FPU_stack_underflow(fetchdat, 0, 0);
goto next_ins;
}
status = i387cw_to_softfloat_status_word(i387_get_control_word());
result = floatx80_round_to_int(FPU_read_regi(0), &status);
if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) {
FPU_save_regi(result, 0);
}
next_ins:
CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.frndint) : (x87_timings.frndint * cpu_multi));
CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.frndint) : (x87_concurrency.frndint * cpu_multi));
return 0;
}

View File

@@ -0,0 +1,489 @@
#define cmp_FPU(name, optype, a_size, load_var, rw, use_var, is_nan, cycle_postfix) \
static int sf_FCOM##name##_a##a_size(uint32_t fetchdat) \
{ \
floatx80 a; \
int rc; \
struct float_status_t status; \
optype temp; \
FP_ENTER(); \
fetch_ea_##a_size(fetchdat); \
SEG_CHECK_READ(cpu_state.ea_seg); \
load_var = rw; \
if (cpu_state.abrt) \
return 1;\
clear_C1(); \
if (IS_TAG_EMPTY(0)) { \
FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0); \
setcc(C0 | C2 | C3); \
goto next_ins; \
} \
status = i387cw_to_softfloat_status_word(i387_get_control_word()); \
a = FPU_read_regi(0); \
if (is_nan) { \
rc = float_relation_unordered; \
float_raise(&status, float_flag_invalid); \
} else { \
rc = floatx80_compare_two(a, use_var, &status); \
} \
setcc(FPU_status_word_flags_fpu_compare(rc)); \
FPU_exception(fetchdat, status.float_exception_flags, 0); \
\
next_ins: \
CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fcom##cycle_postfix) : ((x87_timings.fcom##cycle_postfix) * cpu_multi)); \
CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fcom##cycle_postfix) : ((x87_concurrency.fcom##cycle_postfix) * cpu_multi)); \
return 0; \
} \
static int sf_FCOMP##name##_a##a_size(uint32_t fetchdat) \
{ \
floatx80 a; \
int rc; \
struct float_status_t status; \
optype temp; \
FP_ENTER(); \
fetch_ea_##a_size(fetchdat); \
SEG_CHECK_READ(cpu_state.ea_seg); \
load_var = rw; \
if (cpu_state.abrt) \
return 1;\
clear_C1(); \
if (IS_TAG_EMPTY(0)) { \
FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0); \
setcc(C0 | C2 | C3); \
if (is_IA_masked()) \
FPU_pop(); \
\
goto next_ins; \
} \
status = i387cw_to_softfloat_status_word(i387_get_control_word()); \
a = FPU_read_regi(0); \
if (is_nan) { \
rc = float_relation_unordered; \
float_raise(&status, float_flag_invalid); \
} else { \
rc = floatx80_compare_two(a, use_var, &status); \
} \
setcc(FPU_status_word_flags_fpu_compare(rc)); \
if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) \
FPU_pop(); \
\
next_ins: \
CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fcom##cycle_postfix) : ((x87_timings.fcom##cycle_postfix) * cpu_multi)); \
CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fcom##cycle_postfix) : ((x87_concurrency.fcom##cycle_postfix) * cpu_multi)); \
return 0; \
} \
// clang-format off
cmp_FPU(s, float32, 16, temp, geteal(), float32_to_floatx80(temp, &status), floatx80_is_nan(a) || floatx80_is_unsupported(a) || float32_is_nan(temp), _32)
#ifndef FPU_8087
cmp_FPU(s, float32, 32, temp, geteal(), float32_to_floatx80(temp, &status), floatx80_is_nan(a) || floatx80_is_unsupported(a) || float32_is_nan(temp), _32)
#endif
cmp_FPU(d, float64, 16, temp, geteaq(), float64_to_floatx80(temp, &status), floatx80_is_nan(a) || floatx80_is_unsupported(a) || float64_is_nan(temp), _64)
#ifndef FPU_8087
cmp_FPU(d, float64, 32, temp, geteaq(), float64_to_floatx80(temp, &status), floatx80_is_nan(a) || floatx80_is_unsupported(a) || float64_is_nan(temp), _64)
#endif
cmp_FPU(iw, int16_t, 16, temp, (int16_t)geteaw(), int32_to_floatx80((int32_t)temp), 0, _i16)
#ifndef FPU_8087
cmp_FPU(iw, int16_t, 32, temp, (int16_t)geteaw(), int32_to_floatx80((int32_t)temp), 0, _i16)
#endif
cmp_FPU(il, int32_t, 16, temp, (int32_t)geteal(), int32_to_floatx80(temp), 0, _i32)
#ifndef FPU_8087
cmp_FPU(il, int32_t, 32, temp, (int32_t)geteal(), int32_to_floatx80(temp), 0, _i32)
#endif
// clang-format on
static int
sf_FCOM_sti(uint32_t fetchdat)
{
floatx80 a, b;
struct float_status_t status;
int rc;
FP_ENTER();
cpu_state.pc++;
clear_C1();
if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(fetchdat & 7)) {
FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0);
setcc(C0 | C2 | C3);
goto next_ins;
}
status = i387cw_to_softfloat_status_word(i387_get_control_word());
a = FPU_read_regi(0);
b = FPU_read_regi(fetchdat & 7);
rc = floatx80_compare_two(a, b, &status);
setcc(FPU_status_word_flags_fpu_compare(rc));
FPU_exception(fetchdat, status.float_exception_flags, 0);
next_ins:
CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fcom) : (x87_timings.fcom * cpu_multi));
CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fcom) : (x87_concurrency.fcom * cpu_multi));
return 0;
}
static int
sf_FCOMP_sti(uint32_t fetchdat)
{
floatx80 a, b;
struct float_status_t status;
int rc;
FP_ENTER();
cpu_state.pc++;
clear_C1();
if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(fetchdat & 7)) {
FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0);
setcc(C0 | C2 | C3);
if (is_IA_masked()) {
FPU_pop();
}
goto next_ins;
}
status = i387cw_to_softfloat_status_word(i387_get_control_word());
a = FPU_read_regi(0);
b = FPU_read_regi(fetchdat & 7);
rc = floatx80_compare_two(a, b, &status);
setcc(FPU_status_word_flags_fpu_compare(rc));
if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) {
FPU_pop();
}
next_ins:
CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fcom) : (x87_timings.fcom * cpu_multi));
CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fcom) : (x87_concurrency.fcom * cpu_multi));
return 0;
}
static int
sf_FCOMPP(uint32_t fetchdat)
{
floatx80 a, b;
struct float_status_t status;
int rc;
FP_ENTER();
cpu_state.pc++;
clear_C1();
if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(1)) {
FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0);
setcc(C0 | C2 | C3);
if (is_IA_masked()) {
FPU_pop();
FPU_pop();
}
goto next_ins;
}
status = i387cw_to_softfloat_status_word(i387_get_control_word());
a = FPU_read_regi(0);
b = FPU_read_regi(1);
rc = floatx80_compare_two(a, b, &status);
setcc(FPU_status_word_flags_fpu_compare(rc));
if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) {
FPU_pop();
FPU_pop();
}
next_ins:
CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fcom) : (x87_timings.fcom * cpu_multi));
CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fcom) : (x87_concurrency.fcom * cpu_multi));
return 0;
}
#ifndef FPU_8087
static int
sf_FUCOMPP(uint32_t fetchdat)
{
floatx80 a, b;
struct float_status_t status;
int rc;
FP_ENTER();
cpu_state.pc++;
clear_C1();
if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(1)) {
FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0);
setcc(C0 | C2 | C3);
if (is_IA_masked()) {
FPU_pop();
FPU_pop();
}
goto next_ins;
}
status = i387cw_to_softfloat_status_word(i387_get_control_word());
a = FPU_read_regi(0);
b = FPU_read_regi(1);
rc = floatx80_compare_quiet(a, b, &status);
setcc(FPU_status_word_flags_fpu_compare(rc));
if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) {
FPU_pop();
FPU_pop();
}
next_ins:
CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fucom) : (x87_timings.fucom * cpu_multi));
CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fucom) : (x87_concurrency.fucom * cpu_multi));
return 0;
}
static int
sf_FCOMI_st0_stj(uint32_t fetchdat)
{
floatx80 a, b;
struct float_status_t status;
int rc;
FP_ENTER();
cpu_state.pc++;
flags_rebuild();
clear_C1();
if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(fetchdat & 7)) {
FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0);
cpu_state.flags |= (Z_FLAG | P_FLAG | C_FLAG);
goto next_ins;
}
status = i387cw_to_softfloat_status_word(i387_get_control_word());
a = FPU_read_regi(0);
b = FPU_read_regi(fetchdat & 7);
rc = floatx80_compare_two(a, b, &status);
FPU_write_eflags_fpu_compare(rc);
FPU_exception(fetchdat, status.float_exception_flags, 0);
next_ins:
CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fcom) : (x87_timings.fcom * cpu_multi));
CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fcom) : (x87_concurrency.fcom * cpu_multi));
return 0;
}
static int
sf_FCOMIP_st0_stj(uint32_t fetchdat)
{
floatx80 a, b;
struct float_status_t status;
int rc;
FP_ENTER();
cpu_state.pc++;
flags_rebuild();
clear_C1();
if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(fetchdat & 7)) {
FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0);
cpu_state.flags |= (Z_FLAG | P_FLAG | C_FLAG);
if (is_IA_masked()) {
FPU_pop();
}
goto next_ins;
}
status = i387cw_to_softfloat_status_word(i387_get_control_word());
a = FPU_read_regi(0);
b = FPU_read_regi(fetchdat & 7);
rc = floatx80_compare_two(a, b, &status);
FPU_write_eflags_fpu_compare(rc);
if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) {
FPU_pop();
}
next_ins:
CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fcom) : (x87_timings.fcom * cpu_multi));
CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fcom) : (x87_concurrency.fcom * cpu_multi));
return 0;
}
static int
sf_FUCOM_sti(uint32_t fetchdat)
{
floatx80 a, b;
struct float_status_t status;
int rc;
FP_ENTER();
cpu_state.pc++;
clear_C1();
if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(fetchdat & 7)) {
FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0);
setcc(C0 | C2 | C3);
goto next_ins;
}
status = i387cw_to_softfloat_status_word(i387_get_control_word());
a = FPU_read_regi(0);
b = FPU_read_regi(fetchdat & 7);
rc = floatx80_compare_quiet(a, b, &status);
setcc(FPU_status_word_flags_fpu_compare(rc));
FPU_exception(fetchdat, status.float_exception_flags, 0);
next_ins:
CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fucom) : (x87_timings.fucom * cpu_multi));
CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fucom) : (x87_concurrency.fucom * cpu_multi));
return 0;
}
static int
sf_FUCOMP_sti(uint32_t fetchdat)
{
floatx80 a, b;
struct float_status_t status;
int rc;
FP_ENTER();
cpu_state.pc++;
clear_C1();
if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(fetchdat & 7)) {
FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0);
setcc(C0 | C2 | C3);
if (is_IA_masked())
FPU_pop();
goto next_ins;
}
status = i387cw_to_softfloat_status_word(i387_get_control_word());
a = FPU_read_regi(0);
b = FPU_read_regi(fetchdat & 7);
rc = floatx80_compare_quiet(a, b, &status);
setcc(FPU_status_word_flags_fpu_compare(rc));
if (!FPU_exception(fetchdat, status.float_exception_flags, 0))
FPU_pop();
next_ins:
CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fucom) : (x87_timings.fucom * cpu_multi));
CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fucom) : (x87_concurrency.fucom * cpu_multi));
return 0;
}
static int
sf_FUCOMI_st0_stj(uint32_t fetchdat)
{
floatx80 a, b;
struct float_status_t status;
int rc;
FP_ENTER();
cpu_state.pc++;
flags_rebuild();
clear_C1();
if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(fetchdat & 7)) {
FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0);
cpu_state.flags |= (Z_FLAG | P_FLAG | C_FLAG);
goto next_ins;
}
status = i387cw_to_softfloat_status_word(i387_get_control_word());
a = FPU_read_regi(0);
b = FPU_read_regi(fetchdat & 7);
rc = floatx80_compare_quiet(a, b, &status);
FPU_write_eflags_fpu_compare(rc);
FPU_exception(fetchdat, status.float_exception_flags, 0);
next_ins:
CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fucom) : (x87_timings.fucom * cpu_multi));
CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fucom) : (x87_concurrency.fucom * cpu_multi));
return 0;
}
static int
sf_FUCOMIP_st0_stj(uint32_t fetchdat)
{
floatx80 a, b;
struct float_status_t status;
int rc;
FP_ENTER();
cpu_state.pc++;
flags_rebuild();
clear_C1();
if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(fetchdat & 7)) {
FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0);
cpu_state.flags |= (Z_FLAG | P_FLAG | C_FLAG);
if (is_IA_masked())
FPU_pop();
goto next_ins;
}
status = i387cw_to_softfloat_status_word(i387_get_control_word());
a = FPU_read_regi(0);
b = FPU_read_regi(fetchdat & 7);
rc = floatx80_compare_quiet(a, b, &status);
FPU_write_eflags_fpu_compare(rc);
if (!FPU_exception(fetchdat, status.float_exception_flags, 0))
FPU_pop();
next_ins:
CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fucom) : (x87_timings.fucom * cpu_multi));
CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fucom) : (x87_concurrency.fucom * cpu_multi));
return 0;
}
#endif
static int
sf_FTST(uint32_t fetchdat)
{
int rc;
struct float_status_t status;
FP_ENTER();
cpu_state.pc++;
clear_C1();
if (IS_TAG_EMPTY(0)) {
FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0);
setcc(C0 | C2 | C3);
} else {
status = i387cw_to_softfloat_status_word(i387_get_control_word());
rc = floatx80_compare_two(FPU_read_regi(0), Const_Z, &status);
setcc(FPU_status_word_flags_fpu_compare(rc));
FPU_exception(fetchdat, status.float_exception_flags, 0);
}
CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.ftst) : (x87_timings.ftst * cpu_multi));
CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.ftst) : (x87_concurrency.ftst * cpu_multi));
return 0;
}
static int
sf_FXAM(uint32_t fetchdat)
{
floatx80 reg;
int sign;
float_class_t aClass;
FP_ENTER();
cpu_state.pc++;
reg = FPU_read_regi(0);
sign = floatx80_sign(reg);
/*
* Examine the contents of the ST(0) register and sets the condition
* code flags C0, C2 and C3 in the FPU status word to indicate the
* class of value or number in the register.
*/
if (IS_TAG_EMPTY(0)) {
setcc(C3 | C1 | C0);
} else {
aClass = floatx80_class(reg);
switch (aClass) {
case float_zero:
setcc(C3 | C1);
break;
case float_SNaN:
case float_QNaN:
// unsupported handled as NaNs
if (floatx80_is_unsupported(reg)) {
setcc(C1);
} else {
setcc(C1 | C0);
}
break;
case float_negative_inf:
case float_positive_inf:
setcc(C2 | C1 | C0);
break;
case float_denormal:
setcc(C3 | C2 | C1);
break;
case float_normalized:
setcc(C2 | C1);
break;
}
}
/*
* The C1 flag is set to the sign of the value in ST(0), regardless
* of whether the register is empty or full.
*/
if (!sign)
clear_C1();
CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fxam) : (x87_timings.fxam * cpu_multi));
CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fxam) : (x87_concurrency.fxam * cpu_multi));
return 0;
}

View File

@@ -0,0 +1,131 @@
/* A fast way to find out whether x is one of RC_DOWN or RC_CHOP
(and not one of RC_RND or RC_UP).
*/
#define DOWN_OR_CHOP() (fpu_state.cwd & FPU_CW_RC & FPU_RC_DOWN)
static __inline floatx80
FPU_round_const(const floatx80 a, int adj)
{
floatx80 result = a;
result.fraction += adj;
return result;
}
static int
sf_FLDL2T(uint32_t fetchdat)
{
FP_ENTER();
cpu_state.pc++;
clear_C1();
if (!IS_TAG_EMPTY(-1))
FPU_stack_overflow(fetchdat);
else {
FPU_push();
FPU_save_regi(FPU_round_const(Const_L2T, (fpu_state.cwd & FPU_CW_RC) == X87_ROUNDING_UP), 0);
}
CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fld_const) : (x87_timings.fld_const * cpu_multi));
CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fld_const) : (x87_concurrency.fld_const * cpu_multi));
return 0;
}
static int
sf_FLDL2E(uint32_t fetchdat)
{
FP_ENTER();
cpu_state.pc++;
clear_C1();
if (!IS_TAG_EMPTY(-1))
FPU_stack_overflow(fetchdat);
else {
FPU_push();
FPU_save_regi(FPU_round_const(Const_L2E, DOWN_OR_CHOP() ? -1 : 0), 0);
}
CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fld_const) : (x87_timings.fld_const * cpu_multi));
CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fld_const) : (x87_concurrency.fld_const * cpu_multi));
return 0;
}
static int
sf_FLDPI(uint32_t fetchdat)
{
FP_ENTER();
cpu_state.pc++;
clear_C1();
if (!IS_TAG_EMPTY(-1))
FPU_stack_overflow(fetchdat);
else {
FPU_push();
FPU_save_regi(FPU_round_const(Const_PI, DOWN_OR_CHOP() ? -1 : 0), 0);
}
CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fld_const) : (x87_timings.fld_const * cpu_multi));
CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fld_const) : (x87_concurrency.fld_const * cpu_multi));
return 0;
}
static int
sf_FLDEG2(uint32_t fetchdat)
{
FP_ENTER();
cpu_state.pc++;
clear_C1();
if (!IS_TAG_EMPTY(-1))
FPU_stack_overflow(fetchdat);
else {
FPU_push();
FPU_save_regi(FPU_round_const(Const_LG2, DOWN_OR_CHOP() ? -1 : 0), 0);
}
CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fld_const) : (x87_timings.fld_const * cpu_multi));
CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fld_const) : (x87_concurrency.fld_const * cpu_multi));
return 0;
}
static int
sf_FLDLN2(uint32_t fetchdat)
{
FP_ENTER();
cpu_state.pc++;
clear_C1();
if (!IS_TAG_EMPTY(-1))
FPU_stack_overflow(fetchdat);
else {
FPU_push();
FPU_save_regi(FPU_round_const(Const_LN2, DOWN_OR_CHOP() ? -1 : 0), 0);
}
CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fld_const) : (x87_timings.fld_const * cpu_multi));
CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fld_const) : (x87_concurrency.fld_const * cpu_multi));
return 0;
}
static int
sf_FLD1(uint32_t fetchdat)
{
FP_ENTER();
cpu_state.pc++;
clear_C1();
if (!IS_TAG_EMPTY(-1))
FPU_stack_overflow(fetchdat);
else {
FPU_push();
FPU_save_regi(Const_1, 0);
}
CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fld_z1) : (x87_timings.fld_z1 * cpu_multi));
CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fld_z1) : (x87_concurrency.fld_z1 * cpu_multi));
return 0;
}
static int
sf_FLDZ(uint32_t fetchdat)
{
FP_ENTER();
cpu_state.pc++;
clear_C1();
if (!IS_TAG_EMPTY(-1))
FPU_stack_overflow(fetchdat);
else {
FPU_push();
FPU_save_regi(Const_Z, 0);
}
CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fld_z1) : (x87_timings.fld_z1 * cpu_multi));
CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fld_z1) : (x87_concurrency.fld_z1 * cpu_multi));
return 0;
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,134 @@
static int
sf_FXCH_sti(uint32_t fetchdat)
{
const floatx80 floatx80_default_nan = packFloatx80(0, floatx80_default_nan_exp, floatx80_default_nan_fraction);
floatx80 st0_reg, sti_reg;
int st0_tag, sti_tag;
FP_ENTER();
FPU_check_pending_exceptions();
cpu_state.pc++;
st0_tag = FPU_gettagi(0);
sti_tag = FPU_gettagi(fetchdat & 7);
st0_reg = FPU_read_regi(0);
sti_reg = FPU_read_regi(fetchdat & 7);
clear_C1();
if ((st0_tag == X87_TAG_EMPTY) || (sti_tag == X87_TAG_EMPTY)) {
FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0);
if (is_IA_masked()) {
/* Masked response */
if (st0_tag == X87_TAG_EMPTY)
st0_reg = floatx80_default_nan;
if (sti_tag == X87_TAG_EMPTY)
sti_reg = floatx80_default_nan;
} else
goto next_ins;
}
FPU_save_regi(st0_reg, fetchdat & 7);
FPU_save_regi(sti_reg, 0);
next_ins:
CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fxch) : (x87_timings.fxch * cpu_multi));
CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fxch) : (x87_concurrency.fxch * cpu_multi));
return 0;
}
static int
sf_FCHS(uint32_t fetchdat)
{
floatx80 st0_reg;
floatx80 result;
FP_ENTER();
FPU_check_pending_exceptions();
cpu_state.pc++;
if (IS_TAG_EMPTY(0))
FPU_stack_underflow(fetchdat, 0, 0);
else {
clear_C1();
st0_reg = FPU_read_regi(0);
result = floatx80_chs(st0_reg);
FPU_save_regi(result, 0);
}
CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fchs) : (x87_timings.fchs * cpu_multi));
CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fchs) : (x87_concurrency.fchs * cpu_multi));
return 0;
}
static int
sf_FABS(uint32_t fetchdat)
{
floatx80 st0_reg;
floatx80 result;
FP_ENTER();
FPU_check_pending_exceptions();
cpu_state.pc++;
if (IS_TAG_EMPTY(0))
FPU_stack_underflow(fetchdat, 0, 0);
else {
clear_C1();
st0_reg = FPU_read_regi(0);
result = floatx80_abs(st0_reg);
FPU_save_regi(result, 0);
}
CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fabs) : (x87_timings.fabs * cpu_multi));
CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fabs) : (x87_concurrency.fabs * cpu_multi));
return 0;
}
static int
sf_FDECSTP(uint32_t fetchdat)
{
FP_ENTER();
FPU_check_pending_exceptions();
cpu_state.pc++;
clear_C1();
fpu_state.tos = (fpu_state.tos - 1) & 7;
CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fincdecstp) : (x87_timings.fincdecstp * cpu_multi));
CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fincdecstp) : (x87_concurrency.fincdecstp * cpu_multi));
return 0;
}
static int
sf_FINCSTP(uint32_t fetchdat)
{
FP_ENTER();
FPU_check_pending_exceptions();
cpu_state.pc++;
clear_C1();
fpu_state.tos = (fpu_state.tos + 1) & 7;
CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fincdecstp) : (x87_timings.fincdecstp * cpu_multi));
CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fincdecstp) : (x87_concurrency.fincdecstp * cpu_multi));
return 0;
}
static int
sf_FFREE_sti(uint32_t fetchdat)
{
FP_ENTER();
FPU_check_pending_exceptions();
cpu_state.pc++;
clear_C1();
FPU_settagi(X87_TAG_EMPTY, fetchdat & 7);
CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.ffree) : (x87_timings.ffree * cpu_multi));
CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.ffree) : (x87_concurrency.ffree * cpu_multi));
return 0;
}
static int
sf_FFREEP_sti(uint32_t fetchdat)
{
FP_ENTER();
FPU_check_pending_exceptions();
cpu_state.pc++;
clear_C1();
FPU_settagi(X87_TAG_EMPTY, fetchdat & 7);
if (cpu_state.abrt)
return 1;
FPU_pop();
CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.ffree) : (x87_timings.ffree * cpu_multi));
CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.ffree) : (x87_concurrency.ffree * cpu_multi));
return 0;
}

View File

@@ -0,0 +1,593 @@
static uint32_t
fpu_save_environment(void)
{
int tag;
unsigned offset;
/* read all registers in stack order and update x87 tag word */
for (int n = 0; n < 8; n++) {
// update tag only if it is not empty
if (!IS_TAG_EMPTY(n)) {
tag = FPU_tagof(FPU_read_regi(n));
FPU_settagi(tag, n);
}
}
fpu_state.swd = (fpu_state.swd & ~(7 << 11)) | ((fpu_state.tos & 7) << 11);
switch ((cr0 & 1) | (cpu_state.op32 & 0x100)) {
case 0x000: { /*16-bit real mode*/
uint16_t tmp;
uint32_t fp_ip, fp_dp;
fp_ip = ((uint32_t)(fpu_state.fcs << 4)) | fpu_state.fip;
fp_dp = ((uint32_t)(fpu_state.fds << 4)) | fpu_state.fdp;
tmp = i387_get_control_word();
writememw(easeg, cpu_state.eaaddr + 0x00, tmp);
tmp = i387_get_status_word();
writememw(easeg, cpu_state.eaaddr + 0x02, tmp);
tmp = fpu_state.tag;
writememw(easeg, cpu_state.eaaddr + 0x04, tmp);
tmp = fp_ip & 0xffff;
writememw(easeg, cpu_state.eaaddr + 0x06, tmp);
tmp = (uint16_t)((fp_ip & 0xf0000) >> 4) | fpu_state.foo;
writememw(easeg, cpu_state.eaaddr + 0x08, tmp);
tmp = fp_dp & 0xffff;
writememw(easeg, cpu_state.eaaddr + 0x0a, tmp);
tmp = (uint16_t)((fp_dp & 0xf0000) >> 4);
writememw(easeg, cpu_state.eaaddr + 0x0c, tmp);
offset = 0x0e;
}
break;
case 0x001: {/*16-bit protected mode*/
uint16_t tmp;
tmp = i387_get_control_word();
writememw(easeg, cpu_state.eaaddr + 0x00, tmp);
tmp = i387_get_status_word();
writememw(easeg, cpu_state.eaaddr + 0x02, tmp);
tmp = fpu_state.tag;
writememw(easeg, cpu_state.eaaddr + 0x04, tmp);
tmp = (uint16_t)(fpu_state.fip) & 0xffff;
writememw(easeg, cpu_state.eaaddr + 0x06, tmp);
tmp = fpu_state.fcs;
writememw(easeg, cpu_state.eaaddr + 0x08, tmp);
tmp = (uint16_t)(fpu_state.fdp) & 0xffff;
writememw(easeg, cpu_state.eaaddr + 0x0a, tmp);
tmp = fpu_state.fds;
writememw(easeg, cpu_state.eaaddr + 0x0c, tmp);
offset = 0x0e;
}
break;
case 0x100: { /*32-bit real mode*/
uint32_t tmp, fp_ip, fp_dp;
fp_ip = ((uint32_t)(fpu_state.fcs << 4)) | fpu_state.fip;
fp_dp = ((uint32_t)(fpu_state.fds << 4)) | fpu_state.fdp;
tmp = 0xffff0000 | i387_get_control_word();
writememl(easeg, cpu_state.eaaddr + 0x00, tmp);
tmp = 0xffff0000 | i387_get_status_word();
writememl(easeg, cpu_state.eaaddr + 0x04, tmp);
tmp = 0xffff0000 | fpu_state.tag;
writememl(easeg, cpu_state.eaaddr + 0x08, tmp);
tmp = 0xffff0000 | (fp_ip & 0xffff);
writememl(easeg, cpu_state.eaaddr + 0x0c, tmp);
tmp = ((fp_ip & 0xffff0000) >> 4) | fpu_state.foo;
writememl(easeg, cpu_state.eaaddr + 0x10, tmp);
tmp = 0xffff0000 | (fp_dp & 0xffff);
writememl(easeg, cpu_state.eaaddr + 0x14, tmp);
tmp = (fp_dp & 0xffff0000) >> 4;
writememl(easeg, cpu_state.eaaddr + 0x18, tmp);
offset = 0x1c;
}
break;
case 0x101: { /*32-bit protected mode*/
uint32_t tmp;
tmp = 0xffff0000 | i387_get_control_word();
writememl(easeg, cpu_state.eaaddr + 0x00, tmp);
tmp = 0xffff0000 | i387_get_status_word();
writememl(easeg, cpu_state.eaaddr + 0x04, tmp);
tmp = 0xffff0000 | fpu_state.tag;
writememl(easeg, cpu_state.eaaddr + 0x08, tmp);
tmp = (uint32_t)(fpu_state.fip);
writememl(easeg, cpu_state.eaaddr + 0x0c, tmp);
tmp = fpu_state.fcs | (((uint32_t)(fpu_state.foo)) << 16);
writememl(easeg, cpu_state.eaaddr + 0x10, tmp);
tmp = (uint32_t)(fpu_state.fdp);
writememl(easeg, cpu_state.eaaddr + 0x14, tmp);
tmp = 0xffff0000 | fpu_state.fds;
writememl(easeg, cpu_state.eaaddr + 0x18, tmp);
offset = 0x1c;
}
break;
}
return (cpu_state.eaaddr + offset);
}
static uint32_t
fpu_load_environment(void)
{
unsigned offset;
switch ((cr0 & 1) | (cpu_state.op32 & 0x100)) {
case 0x000: { /*16-bit real mode*/
uint16_t tmp;
uint32_t fp_ip, fp_dp;
tmp = readmemw(easeg, cpu_state.eaaddr + 0x0c);
fp_dp = (tmp & 0xf000) << 4;
tmp = readmemw(easeg, cpu_state.eaaddr + 0x0a);
fpu_state.fdp = fp_dp | tmp;
fpu_state.fds = 0;
tmp = readmemw(easeg, cpu_state.eaaddr + 0x08);
fp_ip = (tmp & 0xf000) << 4;
tmp = readmemw(easeg, cpu_state.eaaddr + 0x06);
fpu_state.fip = fp_ip | tmp;
fpu_state.fcs = 0;
tmp = readmemw(easeg, cpu_state.eaaddr + 0x04);
fpu_state.tag = tmp;
tmp = readmemw(easeg, cpu_state.eaaddr + 0x02);
fpu_state.swd = tmp;
fpu_state.tos = (tmp >> 11) & 7;
tmp = readmemw(easeg, cpu_state.eaaddr + 0x00);
fpu_state.cwd = tmp;
offset = 0x0e;
}
break;
case 0x001: {/*16-bit protected mode*/
uint16_t tmp;
tmp = readmemw(easeg, cpu_state.eaaddr + 0x0c);
fpu_state.fds = tmp;
tmp = readmemw(easeg, cpu_state.eaaddr + 0x0a);
fpu_state.fdp = tmp;
tmp = readmemw(easeg, cpu_state.eaaddr + 0x08);
fpu_state.fcs = tmp;
tmp = readmemw(easeg, cpu_state.eaaddr + 0x06);
fpu_state.fip = tmp;
tmp = readmemw(easeg, cpu_state.eaaddr + 0x04);
fpu_state.tag = tmp;
tmp = readmemw(easeg, cpu_state.eaaddr + 0x02);
fpu_state.swd = tmp;
fpu_state.tos = (tmp >> 11) & 7;
tmp = readmemw(easeg, cpu_state.eaaddr + 0x00);
fpu_state.cwd = tmp;
offset = 0x0e;
}
break;
case 0x100: { /*32-bit real mode*/
uint32_t tmp, fp_ip, fp_dp;
tmp = readmeml(easeg, cpu_state.eaaddr + 0x18);
fp_dp = (tmp & 0x0ffff000) << 4;
tmp = readmeml(easeg, cpu_state.eaaddr + 0x14);
fp_dp |= (tmp & 0xffff);
fpu_state.fdp = fp_dp;
fpu_state.fds = 0;
tmp = readmeml(easeg, cpu_state.eaaddr + 0x10);
fpu_state.foo = tmp & 0x07ff;
fp_ip = (tmp & 0x0ffff000) << 4;
tmp = readmeml(easeg, cpu_state.eaaddr + 0x0c);
fp_ip |= (tmp & 0xffff);
fpu_state.fip = fp_ip;
fpu_state.fcs = 0;
tmp = readmeml(easeg, cpu_state.eaaddr + 0x08);
fpu_state.tag = tmp & 0xffff;
tmp = readmeml(easeg, cpu_state.eaaddr + 0x04);
fpu_state.swd = tmp & 0xffff;
fpu_state.tos = (tmp >> 11) & 7;
tmp = readmeml(easeg, cpu_state.eaaddr + 0x00);
fpu_state.cwd = tmp & 0xffff;
offset = 0x1c;
}
break;
case 0x101: { /*32-bit protected mode*/
uint32_t tmp;
tmp = readmeml(easeg, cpu_state.eaaddr + 0x18);
fpu_state.fds = tmp & 0xffff;
tmp = readmeml(easeg, cpu_state.eaaddr + 0x14);
fpu_state.fdp = tmp;
tmp = readmeml(easeg, cpu_state.eaaddr + 0x10);
fpu_state.fcs = tmp & 0xffff;
fpu_state.foo = (tmp >> 16) & 0x07ff;
tmp = readmeml(easeg, cpu_state.eaaddr + 0x0c);
fpu_state.fip = tmp;
tmp = readmeml(easeg, cpu_state.eaaddr + 0x08);
fpu_state.tag = tmp & 0xffff;
tmp = readmeml(easeg, cpu_state.eaaddr + 0x04);
fpu_state.swd = tmp & 0xffff;
fpu_state.tos = (tmp >> 11) & 7;
tmp = readmeml(easeg, cpu_state.eaaddr + 0x00);
fpu_state.cwd = tmp & 0xffff;
offset = 0x1c;
}
break;
}
/* always set bit 6 as '1 */
fpu_state.cwd = (fpu_state.cwd & ~FPU_CW_Reserved_Bits) | 0x0040;
/* check for unmasked exceptions */
if (fpu_state.swd & ~fpu_state.cwd & FPU_CW_Exceptions_Mask) {
/* set the B and ES bits in the status-word */
fpu_state.swd |= (FPU_SW_Summary | FPU_SW_Backward);
} else {
/* clear the B and ES bits in the status-word */
fpu_state.swd &= ~(FPU_SW_Summary | FPU_SW_Backward);
}
return (cpu_state.eaaddr + offset);
}
static int
sf_FLDCW_a16(uint32_t fetchdat)
{
uint16_t tempw;
FP_ENTER();
fetch_ea_16(fetchdat);
SEG_CHECK_READ(cpu_state.ea_seg);
tempw = geteaw();
if (cpu_state.abrt)
return 1;
fpu_state.cwd = (tempw & ~FPU_CW_Reserved_Bits) | 0x0040; // bit 6 is reserved as '1
/* check for unmasked exceptions */
if (fpu_state.swd & (~fpu_state.cwd & FPU_CW_Exceptions_Mask)) {
/* set the B and ES bits in the status-word */
fpu_state.swd |= (FPU_SW_Summary | FPU_SW_Backward);
} else {
/* clear the B and ES bits in the status-word */
fpu_state.swd &= ~(FPU_SW_Summary | FPU_SW_Backward);
}
CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fldcw) : (x87_timings.fldcw * cpu_multi));
CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fldcw) : (x87_concurrency.fldcw * cpu_multi));
return 0;
}
#ifndef FPU_8087
static int
sf_FLDCW_a32(uint32_t fetchdat)
{
uint16_t tempw;
FP_ENTER();
fetch_ea_32(fetchdat);
SEG_CHECK_READ(cpu_state.ea_seg);
tempw = geteaw();
if (cpu_state.abrt)
return 1;
fpu_state.cwd = (tempw & ~FPU_CW_Reserved_Bits) | 0x0040; // bit 6 is reserved as '1
/* check for unmasked exceptions */
if (fpu_state.swd & (~fpu_state.cwd & FPU_CW_Exceptions_Mask)) {
/* set the B and ES bits in the status-word */
fpu_state.swd |= (FPU_SW_Summary | FPU_SW_Backward);
} else {
/* clear the B and ES bits in the status-word */
fpu_state.swd &= ~(FPU_SW_Summary | FPU_SW_Backward);
}
CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fldcw) : (x87_timings.fldcw * cpu_multi));
CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fldcw) : (x87_concurrency.fldcw * cpu_multi));
return 0;
}
#endif
static int
sf_FNSTCW_a16(uint32_t fetchdat)
{
uint16_t cwd = i387_get_control_word();
FP_ENTER();
fetch_ea_16(fetchdat);
SEG_CHECK_WRITE(cpu_state.ea_seg);
seteaw(cwd);
CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fstcw_sw) : (x87_timings.fstcw_sw * cpu_multi));
CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fstenv) : (x87_concurrency.fstenv * cpu_multi));
return cpu_state.abrt;
}
#ifndef FPU_8087
static int
sf_FNSTCW_a32(uint32_t fetchdat)
{
uint16_t cwd = i387_get_control_word();
FP_ENTER();
fetch_ea_32(fetchdat);
SEG_CHECK_WRITE(cpu_state.ea_seg);
seteaw(cwd);
CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fstcw_sw) : (x87_timings.fstcw_sw * cpu_multi));
CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fstcw_sw) : (x87_concurrency.fstcw_sw * cpu_multi));
return cpu_state.abrt;
}
#endif
static int
sf_FNSTSW_a16(uint32_t fetchdat)
{
uint16_t swd = i387_get_status_word();
FP_ENTER();
fetch_ea_16(fetchdat);
SEG_CHECK_WRITE(cpu_state.ea_seg);
seteaw(swd);
CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fstcw_sw) : (x87_timings.fstcw_sw * cpu_multi));
CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fstcw_sw) : (x87_concurrency.fstcw_sw * cpu_multi));
return cpu_state.abrt;
}
#ifndef FPU_8087
static int
sf_FNSTSW_a32(uint32_t fetchdat)
{
uint16_t swd = i387_get_status_word();
FP_ENTER();
fetch_ea_32(fetchdat);
SEG_CHECK_WRITE(cpu_state.ea_seg);
seteaw(swd);
CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fstcw_sw) : (x87_timings.fstcw_sw * cpu_multi));
CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fstcw_sw) : (x87_concurrency.fstcw_sw * cpu_multi));
return cpu_state.abrt;
}
#endif
#ifdef FPU_8087
static int
sf_FI(uint32_t fetchdat)
{
FP_ENTER();
cpu_state.pc++;
fpu_state.cwd &= ~FPU_SW_Summary;
if (rmdat == 0xe1)
fpu_state.cwd |= FPU_SW_Summary;
wait(3, 0);
return 0;
}
#else
static int
sf_FNSTSW_AX(uint32_t fetchdat)
{
FP_ENTER();
cpu_state.pc++;
AX = i387_get_status_word();
CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fstcw_sw) : (x87_timings.fstcw_sw * cpu_multi));
CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fstcw_sw) : (x87_concurrency.fstcw_sw * cpu_multi));
return 0;
}
#endif
static int
sf_FRSTOR_a16(uint32_t fetchdat)
{
floatx80 tmp;
int offset;
FP_ENTER();
fetch_ea_16(fetchdat);
SEG_CHECK_READ(cpu_state.ea_seg);
offset = fpu_load_environment();
for (int n = 0; n < 8; n++) {
tmp.fraction = readmemq(easeg, offset + (n * 10));
tmp.exp = readmemw(easeg, offset + (n * 10) + 8);
FPU_save_regi_tag(tmp, IS_TAG_EMPTY(n) ? X87_TAG_EMPTY : FPU_tagof(tmp), n);
}
CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.frstor) : (x87_timings.frstor * cpu_multi));
CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.frstor) : (x87_concurrency.frstor * cpu_multi));
return cpu_state.abrt;
}
#ifndef FPU_8087
static int
sf_FRSTOR_a32(uint32_t fetchdat)
{
floatx80 tmp;
int offset;
FP_ENTER();
fetch_ea_32(fetchdat);
SEG_CHECK_READ(cpu_state.ea_seg);
offset = fpu_load_environment();
for (int n = 0; n < 8; n++) {
tmp.fraction = readmemq(easeg, offset + (n * 10));
tmp.exp = readmemw(easeg, offset + (n * 10) + 8);
FPU_save_regi_tag(tmp, IS_TAG_EMPTY(n) ? X87_TAG_EMPTY : FPU_tagof(tmp), n);
}
CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.frstor) : (x87_timings.frstor * cpu_multi));
CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.frstor) : (x87_concurrency.frstor * cpu_multi));
return cpu_state.abrt;
}
#endif
static int
sf_FNSAVE_a16(uint32_t fetchdat)
{
floatx80 stn;
int offset;
FP_ENTER();
fetch_ea_16(fetchdat);
SEG_CHECK_WRITE(cpu_state.ea_seg);
offset = fpu_save_environment();
/* save all registers in stack order. */
for (int m = 0; m < 8; m++) {
stn = FPU_read_regi(m);
writememq(easeg, offset + (m * 10), stn.fraction);
writememw(easeg, offset + (m * 10) + 8, stn.exp);
}
#ifdef FPU_8087
fpu_state.swd = 0x3FF;
#else
fpu_state.cwd = 0x37F;
#endif
fpu_state.swd = 0;
fpu_state.tos = 0;
fpu_state.tag = 0xffff;
cpu_state.ismmx = 0;
fpu_state.foo = 0;
fpu_state.fds = 0;
fpu_state.fdp = 0;
fpu_state.fcs = 0;
fpu_state.fip = 0;
CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fsave) : (x87_timings.fsave * cpu_multi));
CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fsave) : (x87_concurrency.fsave * cpu_multi));
return cpu_state.abrt;
}
#ifndef FPU_8087
static int
sf_FNSAVE_a32(uint32_t fetchdat)
{
floatx80 stn;
int offset;
FP_ENTER();
fetch_ea_32(fetchdat);
SEG_CHECK_WRITE(cpu_state.ea_seg);
offset = fpu_save_environment();
/* save all registers in stack order. */
for (int m = 0; m < 8; m++) {
stn = FPU_read_regi(m);
writememq(easeg, offset + (m * 10), stn.fraction);
writememw(easeg, offset + (m * 10) + 8, stn.exp);
}
#ifdef FPU_8087
fpu_state.swd = 0x3FF;
#else
fpu_state.cwd = 0x37F;
#endif
fpu_state.swd = 0;
fpu_state.tos = 0;
fpu_state.tag = 0xffff;
cpu_state.ismmx = 0;
fpu_state.foo = 0;
fpu_state.fds = 0;
fpu_state.fdp = 0;
fpu_state.fcs = 0;
fpu_state.fip = 0;
CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fsave) : (x87_timings.fsave * cpu_multi));
CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fsave) : (x87_concurrency.fsave * cpu_multi));
return cpu_state.abrt;
}
#endif
static int
sf_FNCLEX(uint32_t fetchdat)
{
FP_ENTER();
cpu_state.pc++;
fpu_state.swd &= ~(FPU_SW_Backward | FPU_SW_Summary | FPU_SW_Stack_Fault | FPU_SW_Precision |
FPU_SW_Underflow | FPU_SW_Overflow | FPU_SW_Zero_Div | FPU_SW_Denormal_Op |
FPU_SW_Invalid);
CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fnop) : (x87_timings.fnop * cpu_multi));
CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fnop) : (x87_concurrency.fnop * cpu_multi));
return 0;
}
static int
sf_FNINIT(uint32_t fetchdat)
{
FP_ENTER();
cpu_state.pc++;
#ifdef FPU_8087
fpu_state.cwd = 0x3FF;
#else
fpu_state.cwd = 0x37F;
#endif
fpu_state.swd = 0;
fpu_state.tos = 0;
fpu_state.tag = 0xffff;
fpu_state.foo = 0;
fpu_state.fds = 0;
fpu_state.fdp = 0;
fpu_state.fcs = 0;
fpu_state.fip = 0;
cpu_state.ismmx = 0;
CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.finit) : (x87_timings.finit * cpu_multi));
CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.finit) : (x87_concurrency.finit * cpu_multi));
CPU_BLOCK_END();
return 0;
}
static int
sf_FLDENV_a16(uint32_t fetchdat)
{
int tag;
FP_ENTER();
fetch_ea_16(fetchdat);
SEG_CHECK_READ(cpu_state.ea_seg);
fpu_load_environment();
/* read all registers in stack order and update x87 tag word */
for (int n = 0; n < 8; n++) {
// update tag only if it is not empty
if (!IS_TAG_EMPTY(n)) {
tag = FPU_tagof(FPU_read_regi(n));
FPU_settagi(tag, n);
}
}
CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fldenv) : (x87_timings.fldenv * cpu_multi));
CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fldenv) : (x87_concurrency.fldenv * cpu_multi));
return cpu_state.abrt;
}
#ifndef FPU_8087
static int
sf_FLDENV_a32(uint32_t fetchdat)
{
int tag;
FP_ENTER();
fetch_ea_32(fetchdat);
SEG_CHECK_READ(cpu_state.ea_seg);
fpu_load_environment();
/* read all registers in stack order and update x87 tag word */
for (int n = 0; n < 8; n++) {
// update tag only if it is not empty
if (!IS_TAG_EMPTY(n)) {
tag = FPU_tagof(FPU_read_regi(n));
FPU_settagi(tag, n);
}
}
CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fldenv) : (x87_timings.fldenv * cpu_multi));
CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fldenv) : (x87_concurrency.fldenv * cpu_multi));
return cpu_state.abrt;
}
#endif
static int
sf_FNSTENV_a16(uint32_t fetchdat)
{
FP_ENTER();
fetch_ea_16(fetchdat);
SEG_CHECK_WRITE(cpu_state.ea_seg);
fpu_save_environment();
/* mask all floating point exceptions */
fpu_state.cwd |= FPU_CW_Exceptions_Mask;
/* clear the B and ES bits in the status word */
fpu_state.swd &= ~(FPU_SW_Backward|FPU_SW_Summary);
CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fstenv) : (x87_timings.fstenv * cpu_multi));
CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fstenv) : (x87_concurrency.fstenv * cpu_multi));
return cpu_state.abrt;
}
#ifndef FPU_8087
static int
sf_FNSTENV_a32(uint32_t fetchdat)
{
FP_ENTER();
fetch_ea_32(fetchdat);
SEG_CHECK_WRITE(cpu_state.ea_seg);
fpu_save_environment();
/* mask all floating point exceptions */
fpu_state.cwd |= FPU_CW_Exceptions_Mask;
/* clear the B and ES bits in the status word */
fpu_state.swd &= ~(FPU_SW_Backward|FPU_SW_Summary);
CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fstenv) : (x87_timings.fstenv * cpu_multi));
CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fstenv) : (x87_concurrency.fstenv * cpu_multi));
return cpu_state.abrt;
}
#endif
static int
sf_FNOP(uint32_t fetchdat)
{
FP_ENTER();
cpu_state.pc++;
CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fnop) : (x87_timings.fnop * cpu_multi));
CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fnop) : (x87_concurrency.fnop * cpu_multi));
return 0;
}

View File

@@ -0,0 +1,418 @@
static int
sf_F2XM1(uint32_t fetchdat)
{
floatx80 result;
struct float_status_t status;
FP_ENTER();
cpu_state.pc++;
clear_C1();
if (IS_TAG_EMPTY(0)) {
FPU_stack_underflow(fetchdat, 0, 0);
goto next_ins;
}
status = i387cw_to_softfloat_status_word(i387_get_control_word() | FPU_PR_80_BITS);
result = f2xm1(FPU_read_regi(0), &status);
if (! FPU_exception(fetchdat, status.float_exception_flags, 0))
FPU_save_regi(result, 0);
next_ins:
CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.f2xm1) : (x87_timings.f2xm1 * cpu_multi));
CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.f2xm1) : (x87_concurrency.f2xm1 * cpu_multi));
return 0;
}
static int
sf_FYL2X(uint32_t fetchdat)
{
floatx80 result;
struct float_status_t status;
FP_ENTER();
cpu_state.pc++;
clear_C1();
if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(1)) {
FPU_stack_underflow(fetchdat, 1, 1);
goto next_ins;
}
status = i387cw_to_softfloat_status_word(i387_get_control_word() | FPU_PR_80_BITS);
result = fyl2x(FPU_read_regi(0), FPU_read_regi(1), &status);
if (! FPU_exception(fetchdat, status.float_exception_flags, 0)) {
FPU_pop();
FPU_save_regi(result, 0);
}
next_ins:
CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fyl2x) : (x87_timings.fyl2x * cpu_multi));
CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fyl2x) : (x87_concurrency.fyl2x * cpu_multi));
return 0;
}
static int
sf_FPTAN(uint32_t fetchdat)
{
const floatx80 floatx80_default_nan = packFloatx80(0, floatx80_default_nan_exp, floatx80_default_nan_fraction);
floatx80 y;
struct float_status_t status;
FP_ENTER();
cpu_state.pc++;
clear_C1();
clear_C2();
if (IS_TAG_EMPTY(0) || !IS_TAG_EMPTY(-1)) {
if (IS_TAG_EMPTY(0))
FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0);
else
FPU_exception(fetchdat, FPU_EX_Stack_Overflow, 0);
/* The masked response */
if (is_IA_masked()) {
FPU_save_regi(floatx80_default_nan, 0);
FPU_push();
FPU_save_regi(floatx80_default_nan, 0);
}
goto next_ins;
}
status = i387cw_to_softfloat_status_word(i387_get_control_word() | FPU_PR_80_BITS);
y = FPU_read_regi(0);
if (ftan(&y, &status) == -1) {
fpu_state.swd |= C2;
goto next_ins;
}
if (floatx80_is_nan(y)) {
if (! FPU_exception(fetchdat, status.float_exception_flags, 0)) {
FPU_save_regi(y, 0);
FPU_push();
FPU_save_regi(y, 0);
}
goto next_ins;
}
if (! FPU_exception(fetchdat, status.float_exception_flags, 0)) {
FPU_save_regi(y, 0);
FPU_push();
FPU_save_regi(Const_1, 0);
}
next_ins:
CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fptan) : (x87_timings.fptan * cpu_multi));
CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fptan) : (x87_concurrency.fptan * cpu_multi));
return 0;
}
static int
sf_FPATAN(uint32_t fetchdat)
{
floatx80 a, b, result;
struct float_status_t status;
FP_ENTER();
cpu_state.pc++;
if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(1)) {
FPU_stack_underflow(fetchdat, 1, 1);
goto next_ins;
}
a = FPU_read_regi(0);
b = FPU_read_regi(1);
status = i387cw_to_softfloat_status_word(i387_get_control_word() | FPU_PR_80_BITS);
result = fpatan(a, b, &status);
if (! FPU_exception(fetchdat, status.float_exception_flags, 0)) {
FPU_pop();
FPU_save_regi(result, 0);
}
next_ins:
CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fpatan) : (x87_timings.fpatan * cpu_multi));
CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fpatan) : (x87_concurrency.fpatan * cpu_multi));
return 0;
}
static int
sf_FXTRACT(uint32_t fetchdat)
{
const floatx80 floatx80_default_nan = packFloatx80(0, floatx80_default_nan_exp, floatx80_default_nan_fraction);
struct float_status_t status;
floatx80 a, b;
FP_ENTER();
cpu_state.pc++;
clear_C1();
#if 0 //TODO
if ((IS_TAG_EMPTY(0) || IS_TAG_EMPTY(-1))) {
if (IS_TAG_EMPTY(0))
FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0);
else
FPU_exception(fetchdat, FPU_EX_Stack_Overflow, 0);
/* The masked response */
if (is_IA_masked()) {
FPU_save_regi(floatx80_default_nan, 0);
FPU_push();
FPU_save_regi(floatx80_default_nan, 0);
}
goto next_ins;
}
#endif
status = i387cw_to_softfloat_status_word(i387_get_control_word());
a = FPU_read_regi(0);
b = floatx80_extract(&a, &status);
if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) {
FPU_save_regi(b, 0); // exponent
FPU_push();
FPU_save_regi(a, 0); // fraction
}
#if 0 //TODO.
next_ins:
#endif
CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fxtract) : (x87_timings.fxtract * cpu_multi));
CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fxtract) : (x87_concurrency.fxtract * cpu_multi));
return 0;
}
static int
sf_FPREM1(uint32_t fetchdat)
{
floatx80 a, b, result;
struct float_status_t status;
uint64_t quotient = 0;
int flags, cc;
FP_ENTER();
cpu_state.pc++;
clear_C1();
clear_C2();
if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(1)) {
FPU_stack_underflow(fetchdat, 0, 0);
goto next_ins;
}
status = i387cw_to_softfloat_status_word(i387_get_control_word());
a = FPU_read_regi(0);
b = FPU_read_regi(1);
flags = floatx80_ieee754_remainder(a, b, &result, &quotient, &status);
if (! FPU_exception(fetchdat, status.float_exception_flags, 0)) {
if (flags >= 0) {
cc = 0;
if (flags)
cc = C2;
else {
if (quotient & 1)
cc |= C1;
if (quotient & 2)
cc |= C3;
if (quotient & 4)
cc |= C0;
}
setcc(cc);
}
FPU_save_regi(result, 0);
}
next_ins:
CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fprem1) : (x87_timings.fprem1 * cpu_multi));
CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fprem1) : (x87_concurrency.fprem1 * cpu_multi));
return 0;
}
static int
sf_FPREM(uint32_t fetchdat)
{
floatx80 a, b, result;
struct float_status_t status;
uint64_t quotient = 0;
int flags, cc;
FP_ENTER();
cpu_state.pc++;
clear_C1();
clear_C2();
if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(1)) {
FPU_stack_underflow(fetchdat, 0, 0);
goto next_ins;
}
status = i387cw_to_softfloat_status_word(i387_get_control_word());
a = FPU_read_regi(0);
b = FPU_read_regi(1);
// handle unsupported extended double-precision floating encodings
flags = floatx80_remainder(a, b, &result, &quotient, &status);
if (! FPU_exception(fetchdat, status.float_exception_flags, 0)) {
if (flags >= 0) {
cc = 0;
if (flags)
cc = C2;
else {
if (quotient & 1)
cc |= C1;
if (quotient & 2)
cc |= C3;
if (quotient & 4)
cc |= C0;
}
setcc(cc);
}
FPU_save_regi(result, 0);
}
next_ins:
CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fprem) : (x87_timings.fprem * cpu_multi));
CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fprem) : (x87_concurrency.fprem * cpu_multi));
return 0;
}
static int
sf_FYL2XP1(uint32_t fetchdat)
{
floatx80 result;
struct float_status_t status;
FP_ENTER();
cpu_state.pc++;
clear_C1();
if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(1)) {
FPU_stack_underflow(fetchdat, 1, 1);
goto next_ins;
}
status = i387cw_to_softfloat_status_word(i387_get_control_word() | FPU_PR_80_BITS);
result = fyl2xp1(FPU_read_regi(0), FPU_read_regi(1), &status);
if (! FPU_exception(fetchdat, status.float_exception_flags, 0)) {
FPU_save_regi(result, 1);
FPU_pop();
}
next_ins:
CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fyl2xp1) : (x87_timings.fyl2xp1 * cpu_multi));
CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fyl2xp1) : (x87_concurrency.fyl2xp1 * cpu_multi));
return 0;
}
#ifndef FPU_8087
static int
sf_FSINCOS(uint32_t fetchdat)
{
const floatx80 floatx80_default_nan = packFloatx80(0, floatx80_default_nan_exp, floatx80_default_nan_fraction);
struct float_status_t status;
floatx80 y, sin_y, cos_y;
FP_ENTER();
cpu_state.pc++;
clear_C1();
clear_C2();
if (IS_TAG_EMPTY(0) || !IS_TAG_EMPTY(-1)) {
if (IS_TAG_EMPTY(0))
FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0);
else
FPU_exception(fetchdat, FPU_EX_Stack_Overflow, 0);
/* The masked response */
if (is_IA_masked()) {
FPU_save_regi(floatx80_default_nan, 0);
FPU_push();
FPU_save_regi(floatx80_default_nan, 0);
}
goto next_ins;
}
status = i387cw_to_softfloat_status_word(i387_get_control_word() | FPU_PR_80_BITS);
y = FPU_read_regi(0);
if (fsincos(y, &sin_y, &cos_y, &status) == -1) {
fpu_state.swd |= C2;
goto next_ins;
}
if (! FPU_exception(fetchdat, status.float_exception_flags, 0)) {
FPU_save_regi(sin_y, 0);
FPU_push();
FPU_save_regi(cos_y, 0);
}
next_ins:
CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fsincos) : (x87_timings.fsincos * cpu_multi));
CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fsincos) : (x87_concurrency.fsincos * cpu_multi));
return 0;
}
#endif
static int
sf_FSCALE(uint32_t fetchdat)
{
floatx80 result;
struct float_status_t status;
FP_ENTER();
cpu_state.pc++;
clear_C1();
if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(1)) {
FPU_stack_underflow(fetchdat, 0, 0);
goto next_ins;
}
status = i387cw_to_softfloat_status_word(i387_get_control_word());
result = floatx80_scale(FPU_read_regi(0), FPU_read_regi(1), &status);
if (! FPU_exception(fetchdat, status.float_exception_flags, 0))
FPU_save_regi(result, 0);
next_ins:
CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fscale) : (x87_timings.fscale * cpu_multi));
CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fscale) : (x87_concurrency.fscale * cpu_multi));
return 0;
}
#ifndef FPU_8087
static int
sf_FSIN(uint32_t fetchdat)
{
floatx80 y;
struct float_status_t status;
FP_ENTER();
cpu_state.pc++;
clear_C1();
clear_C2();
if (IS_TAG_EMPTY(0)) {
FPU_stack_underflow(fetchdat, 0, 0);
goto next_ins;
}
status = i387cw_to_softfloat_status_word(i387_get_control_word() | FPU_PR_80_BITS);
y = FPU_read_regi(0);
if (fsin(&y, &status) == -1) {
fpu_state.swd |= C2;
goto next_ins;
}
if (! FPU_exception(fetchdat, status.float_exception_flags, 0))
FPU_save_regi(y, 0);
next_ins:
CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fsin_cos) : (x87_timings.fsin_cos * cpu_multi));
CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fsin_cos) : (x87_concurrency.fsin_cos * cpu_multi));
return 0;
}
static int
sf_FCOS(uint32_t fetchdat)
{
floatx80 y;
struct float_status_t status;
FP_ENTER();
cpu_state.pc++;
clear_C1();
clear_C2();
if (IS_TAG_EMPTY(0)) {
FPU_stack_underflow(fetchdat, 0, 0);
goto next_ins;
}
status = i387cw_to_softfloat_status_word(i387_get_control_word() | FPU_PR_80_BITS);
y = FPU_read_regi(0);
if (fcos(&y, &status) == -1) {
fpu_state.swd |= C2;
goto next_ins;
}
if (! FPU_exception(fetchdat, status.float_exception_flags, 0))
FPU_save_regi(y, 0);
next_ins:
CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fsin_cos) : (x87_timings.fsin_cos * cpu_multi));
CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fsin_cos) : (x87_concurrency.fsin_cos * cpu_multi));
return 0;
}
#endif