x86-specific FADD (SSE2 version)
This commit is contained in:
@@ -22,6 +22,20 @@
|
||||
*/
|
||||
#include <math.h>
|
||||
#include <fenv.h>
|
||||
#if defined(_MSC_VER) && !defined(__clang__)
|
||||
# if defined i386 || defined __i386 || defined __i386__ || defined _X86_ || defined _M_IX86
|
||||
# define X87_INLINE_ASM
|
||||
# endif
|
||||
#else
|
||||
# if defined i386 || defined __i386 || defined __i386__ || defined _X86_ || defined _M_IX86 || defined _M_X64 || defined __amd64__
|
||||
# define X87_INLINE_ASM
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifdef X87_INLINE_ASM
|
||||
#include <immintrin.h>
|
||||
#endif
|
||||
|
||||
#include "x87_timings.h"
|
||||
#ifdef _MSC_VER
|
||||
# include <intrin.h>
|
||||
@@ -64,16 +78,6 @@ typedef union {
|
||||
};
|
||||
} double_decompose_t;
|
||||
|
||||
#if defined(_MSC_VER) && !defined(__clang__)
|
||||
# if defined i386 || defined __i386 || defined __i386__ || defined _X86_ || defined _M_IX86
|
||||
# define X87_INLINE_ASM
|
||||
# endif
|
||||
#else
|
||||
# if defined i386 || defined __i386 || defined __i386__ || defined _X86_ || defined _M_IX86 || defined _M_X64 || defined __amd64__
|
||||
# define X87_INLINE_ASM
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifdef FPU_8087
|
||||
# define x87_div(dst, src1, src2) \
|
||||
do { \
|
||||
|
||||
@@ -2,33 +2,23 @@
|
||||
#ifdef X87_INLINE_ASM
|
||||
static inline double float_add(double src, double val, int round)
|
||||
{
|
||||
int orig_round, new_round;
|
||||
__asm volatile(""
|
||||
:
|
||||
:
|
||||
: "memory");
|
||||
asm (
|
||||
"fnstcw %0\n" : "=m"(orig_round)
|
||||
);
|
||||
new_round = orig_round & ~(3 << 10);
|
||||
new_round |= (round << 10);
|
||||
int rounding_mode_orig;
|
||||
|
||||
__asm volatile(""
|
||||
:
|
||||
:
|
||||
: "memory");
|
||||
__m128d xmm_src = _mm_load_sd(&src);
|
||||
__m128d xmm_dst = _mm_load_sd(&val);
|
||||
__m128d xmm_res;
|
||||
|
||||
asm (
|
||||
"fldl %0\n"
|
||||
"fldcw %1\n"
|
||||
"faddl %2\n"
|
||||
"fstl %0\n"
|
||||
"fldcw %3\n"
|
||||
: "=m"(src)
|
||||
: "m"(new_round), "m"(val), "m"(orig_round)
|
||||
);
|
||||
rounding_mode_orig = _MM_GET_ROUNDING_MODE();
|
||||
if (round == 0) _MM_SET_ROUNDING_MODE(_MM_ROUND_NEAREST);
|
||||
if (round == 1) _MM_SET_ROUNDING_MODE(_MM_ROUND_DOWN);
|
||||
if (round == 2) _MM_SET_ROUNDING_MODE(_MM_ROUND_UP);
|
||||
if (round == 3) _MM_SET_ROUNDING_MODE(_MM_ROUND_TOWARD_ZERO);
|
||||
|
||||
return src;
|
||||
xmm_res = _mm_add_sd(xmm_src, xmm_dst);
|
||||
|
||||
_MM_SET_ROUNDING_MODE(rounding_mode_orig);
|
||||
|
||||
return _mm_cvtsd_f64(xmm_res);
|
||||
}
|
||||
|
||||
#define DO_FADD(use_var) \
|
||||
|
||||
Reference in New Issue
Block a user