x86-specific FADD (SSE2 version)

This commit is contained in:
Cacodemon345
2025-06-30 02:08:48 +06:00
parent 96734590ca
commit b44042ef1c
2 changed files with 28 additions and 34 deletions

View File

@@ -22,6 +22,20 @@
*/ */
#include <math.h> #include <math.h>
#include <fenv.h> #include <fenv.h>
#if defined(_MSC_VER) && !defined(__clang__)
# if defined i386 || defined __i386 || defined __i386__ || defined _X86_ || defined _M_IX86
# define X87_INLINE_ASM
# endif
#else
# if defined i386 || defined __i386 || defined __i386__ || defined _X86_ || defined _M_IX86 || defined _M_X64 || defined __amd64__
# define X87_INLINE_ASM
# endif
#endif
#ifdef X87_INLINE_ASM
#include <immintrin.h>
#endif
#include "x87_timings.h" #include "x87_timings.h"
#ifdef _MSC_VER #ifdef _MSC_VER
# include <intrin.h> # include <intrin.h>
@@ -64,16 +78,6 @@ typedef union {
}; };
} double_decompose_t; } double_decompose_t;
#if defined(_MSC_VER) && !defined(__clang__)
# if defined i386 || defined __i386 || defined __i386__ || defined _X86_ || defined _M_IX86
# define X87_INLINE_ASM
# endif
#else
# if defined i386 || defined __i386 || defined __i386__ || defined _X86_ || defined _M_IX86 || defined _M_X64 || defined __amd64__
# define X87_INLINE_ASM
# endif
#endif
#ifdef FPU_8087 #ifdef FPU_8087
# define x87_div(dst, src1, src2) \ # define x87_div(dst, src1, src2) \
do { \ do { \

View File

@@ -2,33 +2,23 @@
#ifdef X87_INLINE_ASM #ifdef X87_INLINE_ASM
static inline double float_add(double src, double val, int round) static inline double float_add(double src, double val, int round)
{ {
int orig_round, new_round; int rounding_mode_orig;
__asm volatile(""
:
:
: "memory");
asm (
"fnstcw %0\n" : "=m"(orig_round)
);
new_round = orig_round & ~(3 << 10);
new_round |= (round << 10);
__asm volatile("" __m128d xmm_src = _mm_load_sd(&src);
: __m128d xmm_dst = _mm_load_sd(&val);
: __m128d xmm_res;
: "memory");
asm ( rounding_mode_orig = _MM_GET_ROUNDING_MODE();
"fldl %0\n" if (round == 0) _MM_SET_ROUNDING_MODE(_MM_ROUND_NEAREST);
"fldcw %1\n" if (round == 1) _MM_SET_ROUNDING_MODE(_MM_ROUND_DOWN);
"faddl %2\n" if (round == 2) _MM_SET_ROUNDING_MODE(_MM_ROUND_UP);
"fstl %0\n" if (round == 3) _MM_SET_ROUNDING_MODE(_MM_ROUND_TOWARD_ZERO);
"fldcw %3\n"
: "=m"(src)
: "m"(new_round), "m"(val), "m"(orig_round)
);
return src; xmm_res = _mm_add_sd(xmm_src, xmm_dst);
_MM_SET_ROUNDING_MODE(rounding_mode_orig);
return _mm_cvtsd_f64(xmm_res);
} }
#define DO_FADD(use_var) \ #define DO_FADD(use_var) \