Add FLAC__SSE_SUPPORTED and FLAC__SSE2_SUPPORTED flags.

* Allow compiling using GCC GCC w/o SSE support.
* Allow SSE4.1 intrinsic functions to be enabled.

Patch-from: lvqcl <lvqcl.mail@gmail.com>
This commit is contained in:
Erik de Castro Lopo
2014-01-30 21:49:51 +11:00
parent c2747bec1c
commit d40e986a1e
11 changed files with 87 additions and 26 deletions

View File

@@ -39,6 +39,47 @@
#include <config.h>
#endif
/* SSE intrinsics support by ICC/MSVC/GCC */
#if defined __INTEL_COMPILER
#define FLAC__SSE_TARGET(x)
#define FLAC__SSE_SUPPORTED 1
#define FLAC__SSE2_SUPPORTED 1
#if (__INTEL_COMPILER >= 1000) /* Intel C++ Compiler 10.0 */
#define FLAC__SSSE3_SUPPORTED 1
#define FLAC__SSE4_1_SUPPORTED 1
#endif
#elif defined _MSC_VER
#define FLAC__SSE_TARGET(x)
#define FLAC__SSE_SUPPORTED 1
#define FLAC__SSE2_SUPPORTED 1
#if (_MSC_VER >= 1500) /* MS Visual Studio 2008 */
#define FLAC__SSSE3_SUPPORTED 1
#define FLAC__SSE4_1_SUPPORTED 1
#endif
#elif defined __GNUC__
#if (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 9)) /* since GCC 4.9 -msse.. compiler options aren't necessary */
#define FLAC__SSE_TARGET(x) __attribute__ ((__target__ (x)))
#define FLAC__SSE_SUPPORTED 1
#define FLAC__SSE2_SUPPORTED 1
#define FLAC__SSSE3_SUPPORTED 1
#define FLAC__SSE4_1_SUPPORTED 1
#else /* for GCC older than 4.9 */
#define FLAC__SSE_TARGET(x)
#ifdef __SSE__
#define FLAC__SSE_SUPPORTED 1
#endif
#ifdef __SSE2__
#define FLAC__SSE2_SUPPORTED 1
#endif
#ifdef __SSSE3__
#define FLAC__SSSE3_SUPPORTED 1
#endif
#ifdef __SSE4_1__
#define FLAC__SSE4_1_SUPPORTED 1
#endif
#endif /* GCC version */
#endif /* compiler version */
typedef enum {
FLAC__CPUINFO_TYPE_IA32,
FLAC__CPUINFO_TYPE_X86_64,

View File

@@ -37,6 +37,7 @@
#include <config.h>
#endif
#include "private/cpu.h"
#include "private/float.h"
#include "FLAC/format.h"
@@ -80,10 +81,12 @@ void FLAC__lpc_compute_autocorrelation_asm_ia32_3dnow(const FLAC__real data[], u
# endif
# endif
# if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && defined FLAC__HAS_X86INTRIN
# ifdef FLAC__SSE_SUPPORTED
void FLAC__lpc_compute_autocorrelation_intrin_sse_lag_4(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[]);
void FLAC__lpc_compute_autocorrelation_intrin_sse_lag_8(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[]);
void FLAC__lpc_compute_autocorrelation_intrin_sse_lag_12(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[]);
void FLAC__lpc_compute_autocorrelation_intrin_sse_lag_16(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[]);
# endif
# endif
#endif
@@ -156,9 +159,11 @@ void FLAC__lpc_compute_residual_from_qlp_coefficients_wide_asm_ia32(const FLAC__
# endif
# endif
# if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && defined FLAC__HAS_X86INTRIN
# ifdef FLAC__SSE2_SUPPORTED
void FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2(const FLAC__int32 *data, unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 residual[]);
void FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_sse2(const FLAC__int32 *data, unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 residual[]);
# ifdef FLAC__SSE4_SUPPORTED
# endif
# ifdef FLAC__SSE4_1_SUPPORTED
void FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_sse41(const FLAC__int32 *data, unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 residual[]);
# endif
# endif
@@ -195,9 +200,9 @@ void FLAC__lpc_restore_signal_asm_ppc_altivec_16(const FLAC__int32 residual[], u
void FLAC__lpc_restore_signal_asm_ppc_altivec_16_order8(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[]);
# endif /* FLAC__CPU_IA32 || FLAC__CPU_PPC */
# if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && defined FLAC__HAS_X86INTRIN
# ifdef FLAC__SSE4_SUPPORTED
# ifdef FLAC__SSE4_1_SUPPORTED
void FLAC__lpc_restore_signal_wide_intrin_sse41(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[]);
# endif
# endif
# endif
#endif /* FLAC__NO_ASM */

View File

@@ -38,11 +38,13 @@
#endif
#if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && defined FLAC__HAS_X86INTRIN
#include "share/compat.h"
#include "private/cpu.h"
#include "FLAC/format.h"
#ifdef FLAC__SSE2_SUPPORTED
extern void FLAC__precompute_partition_info_sums_intrin_sse2(const FLAC__int32 residual[], FLAC__uint64 abs_residual_partition_sums[],
unsigned residual_samples, unsigned predictor_order, unsigned min_partition_order, unsigned max_partition_order, unsigned bps);
#endif
#ifdef FLAC__SSSE3_SUPPORTED
extern void FLAC__precompute_partition_info_sums_intrin_ssse3(const FLAC__int32 residual[], FLAC__uint64 abs_residual_partition_sums[],