mirror of
https://github.com/qemu/qemu.git
synced 2026-04-05 21:50:33 +00:00
fpu: Add conversion routines for OCP FP8 E4M3
Reviewed-by: Chao Liu <chao.liu.zevorn@gmail.com> Signed-off-by: Max Chou <max.chou@sifive.com> [rth: Split out of a larger patch; adjust overflow detection.] Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
committed by
Richard Henderson
parent
d8be495376
commit
27e989f99c
@@ -242,6 +242,15 @@ static void partsN(canonicalize)(FloatPartsN *p, float_status *status,
|
||||
return;
|
||||
case float_expmax_normal:
|
||||
break;
|
||||
case float_expmax_e4m3:
|
||||
if (p->frac_hi == 0b111) {
|
||||
frac_shl(p, fmt->frac_shift);
|
||||
p->cls = (parts_is_snan_frac(p->frac_hi, status)
|
||||
? float_class_snan : float_class_qnan);
|
||||
return;
|
||||
}
|
||||
/* otherwise normal */
|
||||
break;
|
||||
default:
|
||||
g_assert_not_reached();
|
||||
}
|
||||
@@ -262,6 +271,21 @@ static void partsN(canonicalize)(FloatPartsN *p, float_status *status,
|
||||
* The saturate parameter controls saturation behavior for formats that
|
||||
* support it -- when true, overflow produces max normal instead of infinity.
|
||||
*/
|
||||
|
||||
/* Helper for uncanon_normal and uncanon, for FP8 E4M3. */
|
||||
static void partsN(uncanon_e4m3_overflow)(FloatPartsN *p, float_status *s,
|
||||
const FloatFmt *fmt, bool saturate)
|
||||
{
|
||||
assert(N == 64);
|
||||
float_raise(float_flag_overflow | float_flag_inexact, s);
|
||||
if (saturate) {
|
||||
p->exp = fmt->exp_max;
|
||||
p->frac_hi = E4M3_NORMAL_FRAC_MAX;
|
||||
} else {
|
||||
parts_default_nan(p, s);
|
||||
}
|
||||
}
|
||||
|
||||
static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s,
|
||||
const FloatFmt *fmt, bool saturate)
|
||||
{
|
||||
@@ -360,6 +384,12 @@ static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s,
|
||||
}
|
||||
break;
|
||||
|
||||
case float_expmax_e4m3:
|
||||
if (exp > exp_max || p->frac_hi > E4M3_NORMAL_FRAC_MAX) {
|
||||
partsN(uncanon_e4m3_overflow)(p, s, fmt, overflow_norm);
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
g_assert_not_reached();
|
||||
}
|
||||
@@ -459,9 +489,18 @@ static void partsN(uncanon)(FloatPartsN *p, float_status *s,
|
||||
frac_clear(p);
|
||||
return;
|
||||
case float_class_inf:
|
||||
assert(fmt->exp_max_kind == float_expmax_ieee);
|
||||
p->exp = fmt->exp_max;
|
||||
frac_clear(p);
|
||||
switch (fmt->exp_max_kind) {
|
||||
case float_expmax_ieee:
|
||||
p->exp = fmt->exp_max;
|
||||
frac_clear(p);
|
||||
break;
|
||||
case float_expmax_e4m3:
|
||||
partsN(uncanon_e4m3_overflow)(p, s, fmt, saturate);
|
||||
break;
|
||||
case float_expmax_normal:
|
||||
default:
|
||||
g_assert_not_reached();
|
||||
}
|
||||
return;
|
||||
case float_class_qnan:
|
||||
case float_class_snan:
|
||||
|
||||
@@ -528,6 +528,8 @@ typedef enum __attribute__((__packed__)) {
|
||||
float_expmax_ieee,
|
||||
/* exp==max is a normal number; no infinity or nan representation. */
|
||||
float_expmax_normal,
|
||||
/* exp==max, frac==max ? nan : normal; no infinity representation. */
|
||||
float_expmax_e4m3,
|
||||
} FloatFmtExpMaxKind;
|
||||
|
||||
/*
|
||||
@@ -572,6 +574,14 @@ typedef struct {
|
||||
.frac_shift = (-F - 1) & 63, \
|
||||
.round_mask = (1ull << ((-F - 1) & 63)) - 1
|
||||
|
||||
static const FloatFmt float8_e4m3_params = {
|
||||
FLOAT_PARAMS(4, 3),
|
||||
.exp_max_kind = float_expmax_e4m3
|
||||
};
|
||||
|
||||
/* 110 << frac_shift, with the implicit bit set */
|
||||
#define E4M3_NORMAL_FRAC_MAX 0xe000000000000000ull
|
||||
|
||||
static const FloatFmt float8_e5m2_params = {
|
||||
FLOAT_PARAMS(5, 2)
|
||||
};
|
||||
@@ -631,6 +641,11 @@ static void unpack_raw64(FloatParts64 *r, const FloatFmt *fmt, uint64_t raw)
|
||||
};
|
||||
}
|
||||
|
||||
static void QEMU_FLATTEN float8_e4m3_unpack_raw(FloatParts64 *p, float8_e4m3 f)
|
||||
{
|
||||
unpack_raw64(p, &float8_e4m3_params, f);
|
||||
}
|
||||
|
||||
static void QEMU_FLATTEN float8_e5m2_unpack_raw(FloatParts64 *p, float8_e5m2 f)
|
||||
{
|
||||
unpack_raw64(p, &float8_e5m2_params, f);
|
||||
@@ -693,6 +708,11 @@ static uint64_t pack_raw64(const FloatParts64 *p, const FloatFmt *fmt)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static float8_e4m3 QEMU_FLATTEN float8_e4m3_pack_raw(const FloatParts64 *p)
|
||||
{
|
||||
return pack_raw64(p, &float8_e4m3_params);
|
||||
}
|
||||
|
||||
static float8_e5m2 QEMU_FLATTEN float8_e5m2_pack_raw(const FloatParts64 *p)
|
||||
{
|
||||
return pack_raw64(p, &float8_e5m2_params);
|
||||
@@ -1689,6 +1709,13 @@ static const uint16_t rsqrt_tab[128] = {
|
||||
* Pack/unpack routines with a specific FloatFmt.
|
||||
*/
|
||||
|
||||
static void float8_e4m3_unpack_canonical(FloatParts64 *p, float8_e4m3 f,
|
||||
float_status *s)
|
||||
{
|
||||
float8_e4m3_unpack_raw(p, f);
|
||||
parts_canonicalize(p, s, &float8_e4m3_params);
|
||||
}
|
||||
|
||||
static void float8_e5m2_unpack_canonical(FloatParts64 *p, float8_e5m2 f,
|
||||
float_status *s)
|
||||
{
|
||||
@@ -1716,6 +1743,14 @@ static void bfloat16_unpack_canonical(FloatParts64 *p, bfloat16 f,
|
||||
parts_canonicalize(p, s, &bfloat16_params);
|
||||
}
|
||||
|
||||
static float8_e4m3 float8_e4m3_round_pack_canonical(FloatParts64 *p,
|
||||
float_status *s,
|
||||
bool saturate)
|
||||
{
|
||||
parts_uncanon(p, s, &float8_e4m3_params, saturate);
|
||||
return float8_e4m3_pack_raw(p);
|
||||
}
|
||||
|
||||
static float8_e5m2 float8_e5m2_round_pack_canonical(FloatParts64 *p,
|
||||
float_status *s,
|
||||
bool saturate)
|
||||
@@ -2894,6 +2929,15 @@ static void parts_float_to_float_widen(FloatParts128 *a, FloatParts64 *b,
|
||||
}
|
||||
}
|
||||
|
||||
bfloat16 float8_e4m3_to_bfloat16(float8_e4m3 a, float_status *s)
|
||||
{
|
||||
FloatParts64 p;
|
||||
|
||||
float8_e4m3_unpack_canonical(&p, a, s);
|
||||
parts_float_to_float(&p, s);
|
||||
return bfloat16_round_pack_canonical(&p, s);
|
||||
}
|
||||
|
||||
bfloat16 float8_e5m2_to_bfloat16(float8_e5m2 a, float_status *s)
|
||||
{
|
||||
FloatParts64 p;
|
||||
@@ -2923,6 +2967,15 @@ float64 float16_to_float64(float16 a, bool ieee, float_status *s)
|
||||
return float64_round_pack_canonical(&p, s);
|
||||
}
|
||||
|
||||
float8_e4m3 float32_to_float8_e4m3(float32 a, bool saturate, float_status *s)
|
||||
{
|
||||
FloatParts64 p;
|
||||
|
||||
float32_unpack_canonical(&p, a, s);
|
||||
parts_float_to_float(&p, s);
|
||||
return float8_e4m3_round_pack_canonical(&p, s, saturate);
|
||||
}
|
||||
|
||||
float8_e5m2 float32_to_float8_e5m2(float32 a, bool saturate, float_status *s)
|
||||
{
|
||||
FloatParts64 p;
|
||||
@@ -2999,6 +3052,15 @@ float32 float64_to_float32(float64 a, float_status *s)
|
||||
return float32_round_pack_canonical(&p, s);
|
||||
}
|
||||
|
||||
float8_e4m3 bfloat16_to_float8_e4m3(bfloat16 a, bool saturate, float_status *s)
|
||||
{
|
||||
FloatParts64 p;
|
||||
|
||||
bfloat16_unpack_canonical(&p, a, s);
|
||||
parts_float_to_float(&p, s);
|
||||
return float8_e4m3_round_pack_canonical(&p, s, saturate);
|
||||
}
|
||||
|
||||
float8_e5m2 bfloat16_to_float8_e5m2(bfloat16 a, bool saturate, float_status *s)
|
||||
{
|
||||
FloatParts64 p;
|
||||
|
||||
@@ -122,6 +122,7 @@ typedef uint16_t bfloat16;
|
||||
/*
|
||||
* Open Compute Project (OCP) Microscaling Formats
|
||||
*/
|
||||
typedef uint8_t float8_e4m3;
|
||||
typedef uint8_t float8_e5m2;
|
||||
|
||||
/*
|
||||
|
||||
@@ -193,6 +193,10 @@ float128 uint128_to_float128(Int128, float_status *status);
|
||||
| OCP FP8 conversion routines.
|
||||
*----------------------------------------------------------------------------*/
|
||||
|
||||
bfloat16 float8_e4m3_to_bfloat16(float8_e4m3, float_status *status);
|
||||
float8_e4m3 bfloat16_to_float8_e4m3(bfloat16, bool sat, float_status *status);
|
||||
float8_e4m3 float32_to_float8_e4m3(float32, bool sat, float_status *status);
|
||||
|
||||
bfloat16 float8_e5m2_to_bfloat16(float8_e5m2, float_status *status);
|
||||
float8_e5m2 bfloat16_to_float8_e5m2(bfloat16, bool sat, float_status *status);
|
||||
float8_e5m2 float32_to_float8_e5m2(float32, bool sat, float_status *status);
|
||||
|
||||
Reference in New Issue
Block a user