mirror of
https://github.com/claunia/flac.git
synced 2025-12-16 18:54:26 +00:00
Add FLAC__SSE_SUPPORTED and FLAC__SSE2_SUPPORTED flags.
* Allow compiling using GCC GCC w/o SSE support. * Allow SSE4.1 intrinsic functions to be enabled. Patch-from: lvqcl <lvqcl.mail@gmail.com>
This commit is contained in:
@@ -199,12 +199,4 @@ int flac_snprintf(char *str, size_t size, const char *fmt, ...);
|
|||||||
};
|
};
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* SSSE3, SSE4 support: MSVS 2008, GCC 4.3 -- currently disabled, Intel Compiler 10.0 */
|
|
||||||
#if ( defined _MSC_VER && _MSC_VER >= 1500 ) \
|
|
||||||
|| ( 0 && defined __GNUC__ && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)) ) \
|
|
||||||
|| ( defined __INTEL_COMPILER && __INTEL_COMPILER >= 1000 )
|
|
||||||
#define FLAC__SSSE3_SUPPORTED 1
|
|
||||||
#define FLAC__SSE4_SUPPORTED 1
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif /* FLAC__SHARE__COMPAT_H */
|
#endif /* FLAC__SHARE__COMPAT_H */
|
||||||
|
|||||||
@@ -39,6 +39,47 @@
|
|||||||
#include <config.h>
|
#include <config.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/* SSE intrinsics support by ICC/MSVC/GCC */
|
||||||
|
#if defined __INTEL_COMPILER
|
||||||
|
#define FLAC__SSE_TARGET(x)
|
||||||
|
#define FLAC__SSE_SUPPORTED 1
|
||||||
|
#define FLAC__SSE2_SUPPORTED 1
|
||||||
|
#if (__INTEL_COMPILER >= 1000) /* Intel C++ Compiler 10.0 */
|
||||||
|
#define FLAC__SSSE3_SUPPORTED 1
|
||||||
|
#define FLAC__SSE4_1_SUPPORTED 1
|
||||||
|
#endif
|
||||||
|
#elif defined _MSC_VER
|
||||||
|
#define FLAC__SSE_TARGET(x)
|
||||||
|
#define FLAC__SSE_SUPPORTED 1
|
||||||
|
#define FLAC__SSE2_SUPPORTED 1
|
||||||
|
#if (_MSC_VER >= 1500) /* MS Visual Studio 2008 */
|
||||||
|
#define FLAC__SSSE3_SUPPORTED 1
|
||||||
|
#define FLAC__SSE4_1_SUPPORTED 1
|
||||||
|
#endif
|
||||||
|
#elif defined __GNUC__
|
||||||
|
#if (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 9)) /* since GCC 4.9 -msse.. compiler options aren't necessary */
|
||||||
|
#define FLAC__SSE_TARGET(x) __attribute__ ((__target__ (x)))
|
||||||
|
#define FLAC__SSE_SUPPORTED 1
|
||||||
|
#define FLAC__SSE2_SUPPORTED 1
|
||||||
|
#define FLAC__SSSE3_SUPPORTED 1
|
||||||
|
#define FLAC__SSE4_1_SUPPORTED 1
|
||||||
|
#else /* for GCC older than 4.9 */
|
||||||
|
#define FLAC__SSE_TARGET(x)
|
||||||
|
#ifdef __SSE__
|
||||||
|
#define FLAC__SSE_SUPPORTED 1
|
||||||
|
#endif
|
||||||
|
#ifdef __SSE2__
|
||||||
|
#define FLAC__SSE2_SUPPORTED 1
|
||||||
|
#endif
|
||||||
|
#ifdef __SSSE3__
|
||||||
|
#define FLAC__SSSE3_SUPPORTED 1
|
||||||
|
#endif
|
||||||
|
#ifdef __SSE4_1__
|
||||||
|
#define FLAC__SSE4_1_SUPPORTED 1
|
||||||
|
#endif
|
||||||
|
#endif /* GCC version */
|
||||||
|
#endif /* compiler version */
|
||||||
|
|
||||||
typedef enum {
|
typedef enum {
|
||||||
FLAC__CPUINFO_TYPE_IA32,
|
FLAC__CPUINFO_TYPE_IA32,
|
||||||
FLAC__CPUINFO_TYPE_X86_64,
|
FLAC__CPUINFO_TYPE_X86_64,
|
||||||
|
|||||||
@@ -37,6 +37,7 @@
|
|||||||
#include <config.h>
|
#include <config.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#include "private/cpu.h"
|
||||||
#include "private/float.h"
|
#include "private/float.h"
|
||||||
#include "FLAC/format.h"
|
#include "FLAC/format.h"
|
||||||
|
|
||||||
@@ -80,11 +81,13 @@ void FLAC__lpc_compute_autocorrelation_asm_ia32_3dnow(const FLAC__real data[], u
|
|||||||
# endif
|
# endif
|
||||||
# endif
|
# endif
|
||||||
# if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && defined FLAC__HAS_X86INTRIN
|
# if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && defined FLAC__HAS_X86INTRIN
|
||||||
|
# ifdef FLAC__SSE_SUPPORTED
|
||||||
void FLAC__lpc_compute_autocorrelation_intrin_sse_lag_4(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[]);
|
void FLAC__lpc_compute_autocorrelation_intrin_sse_lag_4(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[]);
|
||||||
void FLAC__lpc_compute_autocorrelation_intrin_sse_lag_8(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[]);
|
void FLAC__lpc_compute_autocorrelation_intrin_sse_lag_8(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[]);
|
||||||
void FLAC__lpc_compute_autocorrelation_intrin_sse_lag_12(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[]);
|
void FLAC__lpc_compute_autocorrelation_intrin_sse_lag_12(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[]);
|
||||||
void FLAC__lpc_compute_autocorrelation_intrin_sse_lag_16(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[]);
|
void FLAC__lpc_compute_autocorrelation_intrin_sse_lag_16(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[]);
|
||||||
# endif
|
# endif
|
||||||
|
# endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -156,9 +159,11 @@ void FLAC__lpc_compute_residual_from_qlp_coefficients_wide_asm_ia32(const FLAC__
|
|||||||
# endif
|
# endif
|
||||||
# endif
|
# endif
|
||||||
# if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && defined FLAC__HAS_X86INTRIN
|
# if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && defined FLAC__HAS_X86INTRIN
|
||||||
|
# ifdef FLAC__SSE2_SUPPORTED
|
||||||
void FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2(const FLAC__int32 *data, unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 residual[]);
|
void FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2(const FLAC__int32 *data, unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 residual[]);
|
||||||
void FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_sse2(const FLAC__int32 *data, unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 residual[]);
|
void FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_sse2(const FLAC__int32 *data, unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 residual[]);
|
||||||
# ifdef FLAC__SSE4_SUPPORTED
|
# endif
|
||||||
|
# ifdef FLAC__SSE4_1_SUPPORTED
|
||||||
void FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_sse41(const FLAC__int32 *data, unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 residual[]);
|
void FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_sse41(const FLAC__int32 *data, unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 residual[]);
|
||||||
# endif
|
# endif
|
||||||
# endif
|
# endif
|
||||||
@@ -195,7 +200,7 @@ void FLAC__lpc_restore_signal_asm_ppc_altivec_16(const FLAC__int32 residual[], u
|
|||||||
void FLAC__lpc_restore_signal_asm_ppc_altivec_16_order8(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[]);
|
void FLAC__lpc_restore_signal_asm_ppc_altivec_16_order8(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[]);
|
||||||
# endif /* FLAC__CPU_IA32 || FLAC__CPU_PPC */
|
# endif /* FLAC__CPU_IA32 || FLAC__CPU_PPC */
|
||||||
# if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && defined FLAC__HAS_X86INTRIN
|
# if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && defined FLAC__HAS_X86INTRIN
|
||||||
# ifdef FLAC__SSE4_SUPPORTED
|
# ifdef FLAC__SSE4_1_SUPPORTED
|
||||||
void FLAC__lpc_restore_signal_wide_intrin_sse41(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[]);
|
void FLAC__lpc_restore_signal_wide_intrin_sse41(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[]);
|
||||||
# endif
|
# endif
|
||||||
# endif
|
# endif
|
||||||
|
|||||||
@@ -38,11 +38,13 @@
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && defined FLAC__HAS_X86INTRIN
|
#if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && defined FLAC__HAS_X86INTRIN
|
||||||
#include "share/compat.h"
|
#include "private/cpu.h"
|
||||||
#include "FLAC/format.h"
|
#include "FLAC/format.h"
|
||||||
|
|
||||||
|
#ifdef FLAC__SSE2_SUPPORTED
|
||||||
extern void FLAC__precompute_partition_info_sums_intrin_sse2(const FLAC__int32 residual[], FLAC__uint64 abs_residual_partition_sums[],
|
extern void FLAC__precompute_partition_info_sums_intrin_sse2(const FLAC__int32 residual[], FLAC__uint64 abs_residual_partition_sums[],
|
||||||
unsigned residual_samples, unsigned predictor_order, unsigned min_partition_order, unsigned max_partition_order, unsigned bps);
|
unsigned residual_samples, unsigned predictor_order, unsigned min_partition_order, unsigned max_partition_order, unsigned bps);
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef FLAC__SSSE3_SUPPORTED
|
#ifdef FLAC__SSSE3_SUPPORTED
|
||||||
extern void FLAC__precompute_partition_info_sums_intrin_ssse3(const FLAC__int32 residual[], FLAC__uint64 abs_residual_partition_sums[],
|
extern void FLAC__precompute_partition_info_sums_intrin_ssse3(const FLAC__int32 residual[], FLAC__uint64 abs_residual_partition_sums[],
|
||||||
|
|||||||
@@ -37,13 +37,15 @@
|
|||||||
#ifndef FLAC__INTEGER_ONLY_LIBRARY
|
#ifndef FLAC__INTEGER_ONLY_LIBRARY
|
||||||
#ifndef FLAC__NO_ASM
|
#ifndef FLAC__NO_ASM
|
||||||
#if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && defined FLAC__HAS_X86INTRIN
|
#if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && defined FLAC__HAS_X86INTRIN
|
||||||
|
#include "private/lpc.h"
|
||||||
|
#ifdef FLAC__SSE_SUPPORTED
|
||||||
|
|
||||||
#include "FLAC/assert.h"
|
#include "FLAC/assert.h"
|
||||||
#include "FLAC/format.h"
|
#include "FLAC/format.h"
|
||||||
#include "private/lpc.h"
|
|
||||||
|
|
||||||
#include <xmmintrin.h> /* SSE */
|
#include <xmmintrin.h> /* SSE */
|
||||||
|
|
||||||
|
FLAC__SSE_TARGET("sse")
|
||||||
void FLAC__lpc_compute_autocorrelation_intrin_sse_lag_4(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[])
|
void FLAC__lpc_compute_autocorrelation_intrin_sse_lag_4(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[])
|
||||||
{
|
{
|
||||||
__m128 xmm0, xmm2, xmm5;
|
__m128 xmm0, xmm2, xmm5;
|
||||||
@@ -80,6 +82,7 @@ void FLAC__lpc_compute_autocorrelation_intrin_sse_lag_4(const FLAC__real data[],
|
|||||||
_mm_storeu_ps(autoc, xmm5);
|
_mm_storeu_ps(autoc, xmm5);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
FLAC__SSE_TARGET("sse")
|
||||||
void FLAC__lpc_compute_autocorrelation_intrin_sse_lag_8(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[])
|
void FLAC__lpc_compute_autocorrelation_intrin_sse_lag_8(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[])
|
||||||
{
|
{
|
||||||
__m128 xmm0, xmm1, xmm2, xmm3, xmm5, xmm6;
|
__m128 xmm0, xmm1, xmm2, xmm3, xmm5, xmm6;
|
||||||
@@ -125,6 +128,7 @@ void FLAC__lpc_compute_autocorrelation_intrin_sse_lag_8(const FLAC__real data[],
|
|||||||
_mm_storeu_ps(autoc+4, xmm6);
|
_mm_storeu_ps(autoc+4, xmm6);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
FLAC__SSE_TARGET("sse")
|
||||||
void FLAC__lpc_compute_autocorrelation_intrin_sse_lag_12(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[])
|
void FLAC__lpc_compute_autocorrelation_intrin_sse_lag_12(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[])
|
||||||
{
|
{
|
||||||
__m128 xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7;
|
__m128 xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7;
|
||||||
@@ -178,6 +182,7 @@ void FLAC__lpc_compute_autocorrelation_intrin_sse_lag_12(const FLAC__real data[]
|
|||||||
_mm_storeu_ps(autoc+8, xmm7);
|
_mm_storeu_ps(autoc+8, xmm7);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
FLAC__SSE_TARGET("sse")
|
||||||
void FLAC__lpc_compute_autocorrelation_intrin_sse_lag_16(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[])
|
void FLAC__lpc_compute_autocorrelation_intrin_sse_lag_16(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[])
|
||||||
{
|
{
|
||||||
__m128 xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9;
|
__m128 xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9;
|
||||||
@@ -241,6 +246,7 @@ void FLAC__lpc_compute_autocorrelation_intrin_sse_lag_16(const FLAC__real data[]
|
|||||||
_mm_storeu_ps(autoc+12,xmm9);
|
_mm_storeu_ps(autoc+12,xmm9);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#endif /* FLAC__SSE_SUPPORTED */
|
||||||
#endif /* (FLAC__CPU_IA32 || FLAC__CPU_X86_64) && FLAC__HAS_X86INTRIN */
|
#endif /* (FLAC__CPU_IA32 || FLAC__CPU_X86_64) && FLAC__HAS_X86INTRIN */
|
||||||
#endif /* FLAC__NO_ASM */
|
#endif /* FLAC__NO_ASM */
|
||||||
#endif /* FLAC__INTEGER_ONLY_LIBRARY */
|
#endif /* FLAC__INTEGER_ONLY_LIBRARY */
|
||||||
|
|||||||
@@ -37,13 +37,15 @@
|
|||||||
#ifndef FLAC__INTEGER_ONLY_LIBRARY
|
#ifndef FLAC__INTEGER_ONLY_LIBRARY
|
||||||
#ifndef FLAC__NO_ASM
|
#ifndef FLAC__NO_ASM
|
||||||
#if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && defined FLAC__HAS_X86INTRIN
|
#if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && defined FLAC__HAS_X86INTRIN
|
||||||
|
#include "private/lpc.h"
|
||||||
|
#ifdef FLAC__SSE2_SUPPORTED
|
||||||
|
|
||||||
#include "FLAC/assert.h"
|
#include "FLAC/assert.h"
|
||||||
#include "FLAC/format.h"
|
#include "FLAC/format.h"
|
||||||
#include "private/lpc.h"
|
|
||||||
|
|
||||||
#include <emmintrin.h> /* SSE2 */
|
#include <emmintrin.h> /* SSE2 */
|
||||||
|
|
||||||
|
FLAC__SSE_TARGET("sse2")
|
||||||
void FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2(const FLAC__int32 *data, unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 residual[])
|
void FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2(const FLAC__int32 *data, unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 residual[])
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
@@ -787,6 +789,7 @@ void FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2(const FLAC_
|
|||||||
|
|
||||||
#define RESIDUAL_RESULT(xmmN) residual[i] = data[i] - (_mm_cvtsi128_si32(xmmN) >> lp_quantization);
|
#define RESIDUAL_RESULT(xmmN) residual[i] = data[i] - (_mm_cvtsi128_si32(xmmN) >> lp_quantization);
|
||||||
|
|
||||||
|
FLAC__SSE_TARGET("sse2")
|
||||||
void FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_sse2(const FLAC__int32 *data, unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 residual[])
|
void FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_sse2(const FLAC__int32 *data, unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 residual[])
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
@@ -1313,6 +1316,7 @@ void FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_sse2(const FLAC__in
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#endif /* FLAC__SSE2_SUPPORTED */
|
||||||
#endif /* (FLAC__CPU_IA32 || FLAC__CPU_X86_64) && FLAC__HAS_X86INTRIN */
|
#endif /* (FLAC__CPU_IA32 || FLAC__CPU_X86_64) && FLAC__HAS_X86INTRIN */
|
||||||
#endif /* FLAC__NO_ASM */
|
#endif /* FLAC__NO_ASM */
|
||||||
#endif /* FLAC__INTEGER_ONLY_LIBRARY */
|
#endif /* FLAC__INTEGER_ONLY_LIBRARY */
|
||||||
|
|||||||
@@ -34,16 +34,14 @@
|
|||||||
# include <config.h>
|
# include <config.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#include "share/compat.h"
|
|
||||||
|
|
||||||
#ifndef FLAC__INTEGER_ONLY_LIBRARY
|
#ifndef FLAC__INTEGER_ONLY_LIBRARY
|
||||||
#ifndef FLAC__NO_ASM
|
#ifndef FLAC__NO_ASM
|
||||||
#if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && defined FLAC__HAS_X86INTRIN
|
#if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && defined FLAC__HAS_X86INTRIN
|
||||||
#ifdef FLAC__SSE4_SUPPORTED
|
#include "private/lpc.h"
|
||||||
|
#ifdef FLAC__SSE4_1_SUPPORTED
|
||||||
|
|
||||||
#include "FLAC/assert.h"
|
#include "FLAC/assert.h"
|
||||||
#include "FLAC/format.h"
|
#include "FLAC/format.h"
|
||||||
#include "private/lpc.h"
|
|
||||||
|
|
||||||
#include <smmintrin.h> /* SSE4.1 */
|
#include <smmintrin.h> /* SSE4.1 */
|
||||||
|
|
||||||
@@ -68,6 +66,7 @@
|
|||||||
#define DATA_RESULT(xmmN) data[i] = residual[i] + (FLAC__int32)(_mm_cvtsi128_si64(xmmN) >> lp_quantization);
|
#define DATA_RESULT(xmmN) data[i] = residual[i] + (FLAC__int32)(_mm_cvtsi128_si64(xmmN) >> lp_quantization);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
FLAC__SSE_TARGET("sse4.1")
|
||||||
void FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_sse41(const FLAC__int32 *data, unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 residual[])
|
void FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_sse41(const FLAC__int32 *data, unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 residual[])
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
@@ -594,6 +593,7 @@ void FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_sse41(const FL
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
FLAC__SSE_TARGET("sse4.1")
|
||||||
void FLAC__lpc_restore_signal_wide_intrin_sse41(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[])
|
void FLAC__lpc_restore_signal_wide_intrin_sse41(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[])
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
@@ -1120,7 +1120,7 @@ void FLAC__lpc_restore_signal_wide_intrin_sse41(const FLAC__int32 residual[], un
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif /* FLAC__SSE4_SUPPORTED */
|
#endif /* FLAC__SSE4_1_SUPPORTED */
|
||||||
#endif /* (FLAC__CPU_IA32 || FLAC__CPU_X86_64) && FLAC__HAS_X86INTRIN */
|
#endif /* (FLAC__CPU_IA32 || FLAC__CPU_X86_64) && FLAC__HAS_X86INTRIN */
|
||||||
#endif /* FLAC__NO_ASM */
|
#endif /* FLAC__NO_ASM */
|
||||||
#endif /* FLAC__INTEGER_ONLY_LIBRARY */
|
#endif /* FLAC__INTEGER_ONLY_LIBRARY */
|
||||||
|
|||||||
@@ -417,7 +417,7 @@ static FLAC__StreamDecoderInitStatus init_stream_internal_(
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
#ifdef FLAC__HAS_X86INTRIN
|
#ifdef FLAC__HAS_X86INTRIN
|
||||||
# if defined FLAC__SSE4_SUPPORTED && 0 /* now we have FLAC__lpc_restore_signal_wide_asm_ia32() which is slightly faster */
|
# if defined FLAC__SSE4_1_SUPPORTED && 0 /* now we have FLAC__lpc_restore_signal_wide_asm_ia32() which is slightly faster */
|
||||||
if(decoder->private_->cpuinfo.ia32.sse41)
|
if(decoder->private_->cpuinfo.ia32.sse41)
|
||||||
decoder->private_->local_lpc_restore_signal_64bit = FLAC__lpc_restore_signal_wide_intrin_sse41;
|
decoder->private_->local_lpc_restore_signal_64bit = FLAC__lpc_restore_signal_wide_intrin_sse41;
|
||||||
# endif
|
# endif
|
||||||
|
|||||||
@@ -920,11 +920,13 @@ static FLAC__StreamEncoderInitStatus init_stream_internal_(
|
|||||||
encoder->private_->local_fixed_compute_best_predictor = FLAC__fixed_compute_best_predictor_asm_ia32_mmx_cmov;
|
encoder->private_->local_fixed_compute_best_predictor = FLAC__fixed_compute_best_predictor_asm_ia32_mmx_cmov;
|
||||||
# endif /* FLAC__HAS_NASM */
|
# endif /* FLAC__HAS_NASM */
|
||||||
# ifdef FLAC__HAS_X86INTRIN
|
# ifdef FLAC__HAS_X86INTRIN
|
||||||
|
# ifdef FLAC__SSE2_SUPPORTED
|
||||||
if(encoder->private_->cpuinfo.ia32.sse2) {
|
if(encoder->private_->cpuinfo.ia32.sse2) {
|
||||||
encoder->private_->local_lpc_compute_residual_from_qlp_coefficients = FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_sse2;
|
encoder->private_->local_lpc_compute_residual_from_qlp_coefficients = FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_sse2;
|
||||||
encoder->private_->local_lpc_compute_residual_from_qlp_coefficients_16bit = FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2;
|
encoder->private_->local_lpc_compute_residual_from_qlp_coefficients_16bit = FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2;
|
||||||
}
|
}
|
||||||
# ifdef FLAC__SSE4_SUPPORTED
|
# endif
|
||||||
|
# ifdef FLAC__SSE4_1_SUPPORTED
|
||||||
if(encoder->private_->cpuinfo.ia32.sse41)
|
if(encoder->private_->cpuinfo.ia32.sse41)
|
||||||
encoder->private_->local_lpc_compute_residual_from_qlp_coefficients_64bit = FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_sse41;
|
encoder->private_->local_lpc_compute_residual_from_qlp_coefficients_64bit = FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_sse41;
|
||||||
# endif
|
# endif
|
||||||
@@ -932,6 +934,7 @@ static FLAC__StreamEncoderInitStatus init_stream_internal_(
|
|||||||
# elif defined FLAC__CPU_X86_64
|
# elif defined FLAC__CPU_X86_64
|
||||||
FLAC__ASSERT(encoder->private_->cpuinfo.type == FLAC__CPUINFO_TYPE_X86_64);
|
FLAC__ASSERT(encoder->private_->cpuinfo.type == FLAC__CPUINFO_TYPE_X86_64);
|
||||||
# ifdef FLAC__HAS_X86INTRIN
|
# ifdef FLAC__HAS_X86INTRIN
|
||||||
|
# ifdef FLAC__SSE_SUPPORTED
|
||||||
if(encoder->protected_->max_lpc_order < 4)
|
if(encoder->protected_->max_lpc_order < 4)
|
||||||
encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_sse_lag_4;
|
encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_sse_lag_4;
|
||||||
else if(encoder->protected_->max_lpc_order < 8)
|
else if(encoder->protected_->max_lpc_order < 8)
|
||||||
@@ -940,9 +943,11 @@ static FLAC__StreamEncoderInitStatus init_stream_internal_(
|
|||||||
encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_sse_lag_12;
|
encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_sse_lag_12;
|
||||||
else if(encoder->protected_->max_lpc_order < 16)
|
else if(encoder->protected_->max_lpc_order < 16)
|
||||||
encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_sse_lag_16;
|
encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_sse_lag_16;
|
||||||
|
# endif
|
||||||
|
# ifdef FLAC__SSE2_SUPPORTED
|
||||||
encoder->private_->local_lpc_compute_residual_from_qlp_coefficients = FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_sse2;
|
encoder->private_->local_lpc_compute_residual_from_qlp_coefficients = FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_sse2;
|
||||||
encoder->private_->local_lpc_compute_residual_from_qlp_coefficients_16bit = FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2;
|
encoder->private_->local_lpc_compute_residual_from_qlp_coefficients_16bit = FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2;
|
||||||
|
# endif
|
||||||
# endif /* FLAC__HAS_X86INTRIN */
|
# endif /* FLAC__HAS_X86INTRIN */
|
||||||
# endif /* FLAC__CPU_... */
|
# endif /* FLAC__CPU_... */
|
||||||
}
|
}
|
||||||
@@ -956,15 +961,19 @@ static FLAC__StreamEncoderInitStatus init_stream_internal_(
|
|||||||
encoder->private_->local_precompute_partition_info_sums = FLAC__precompute_partition_info_sums_intrin_ssse3;
|
encoder->private_->local_precompute_partition_info_sums = FLAC__precompute_partition_info_sums_intrin_ssse3;
|
||||||
else
|
else
|
||||||
# endif
|
# endif
|
||||||
|
# ifdef FLAC__SSE2_SUPPORTED
|
||||||
if(encoder->private_->cpuinfo.ia32.sse2)
|
if(encoder->private_->cpuinfo.ia32.sse2)
|
||||||
encoder->private_->local_precompute_partition_info_sums = FLAC__precompute_partition_info_sums_intrin_sse2;
|
encoder->private_->local_precompute_partition_info_sums = FLAC__precompute_partition_info_sums_intrin_sse2;
|
||||||
|
# endif
|
||||||
# elif defined FLAC__CPU_X86_64
|
# elif defined FLAC__CPU_X86_64
|
||||||
# ifdef FLAC__SSSE3_SUPPORTED
|
# ifdef FLAC__SSSE3_SUPPORTED
|
||||||
if(encoder->private_->cpuinfo.x86_64.ssse3)
|
if(encoder->private_->cpuinfo.x86_64.ssse3)
|
||||||
encoder->private_->local_precompute_partition_info_sums = FLAC__precompute_partition_info_sums_intrin_ssse3;
|
encoder->private_->local_precompute_partition_info_sums = FLAC__precompute_partition_info_sums_intrin_ssse3;
|
||||||
else
|
else
|
||||||
# endif
|
# endif
|
||||||
|
# ifdef FLAC__SSE2_SUPPORTED
|
||||||
encoder->private_->local_precompute_partition_info_sums = FLAC__precompute_partition_info_sums_intrin_sse2;
|
encoder->private_->local_precompute_partition_info_sums = FLAC__precompute_partition_info_sums_intrin_sse2;
|
||||||
|
# endif
|
||||||
# endif /* FLAC__CPU_... */
|
# endif /* FLAC__CPU_... */
|
||||||
}
|
}
|
||||||
#endif /* !FLAC__NO_ASM && FLAC__HAS_X86INTRIN */
|
#endif /* !FLAC__NO_ASM && FLAC__HAS_X86INTRIN */
|
||||||
|
|||||||
@@ -36,12 +36,14 @@
|
|||||||
|
|
||||||
#ifndef FLAC__NO_ASM
|
#ifndef FLAC__NO_ASM
|
||||||
#if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && defined FLAC__HAS_X86INTRIN
|
#if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && defined FLAC__HAS_X86INTRIN
|
||||||
|
#include "private/stream_encoder.h"
|
||||||
|
#ifdef FLAC__SSE2_SUPPORTED
|
||||||
|
|
||||||
#include <stdlib.h> /* for abs() */
|
#include <stdlib.h> /* for abs() */
|
||||||
#include <emmintrin.h> /* SSE2 */
|
#include <emmintrin.h> /* SSE2 */
|
||||||
#include "FLAC/assert.h"
|
#include "FLAC/assert.h"
|
||||||
#include "private/stream_encoder.h"
|
|
||||||
|
|
||||||
|
FLAC__SSE_TARGET("sse2")
|
||||||
void FLAC__precompute_partition_info_sums_intrin_sse2(const FLAC__int32 residual[], FLAC__uint64 abs_residual_partition_sums[],
|
void FLAC__precompute_partition_info_sums_intrin_sse2(const FLAC__int32 residual[], FLAC__uint64 abs_residual_partition_sums[],
|
||||||
unsigned residual_samples, unsigned predictor_order, unsigned min_partition_order, unsigned max_partition_order, unsigned bps)
|
unsigned residual_samples, unsigned predictor_order, unsigned min_partition_order, unsigned max_partition_order, unsigned bps)
|
||||||
{
|
{
|
||||||
@@ -157,5 +159,6 @@ void FLAC__precompute_partition_info_sums_intrin_sse2(const FLAC__int32 residual
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#endif /* FLAC__SSE2_SUPPORTED */
|
||||||
#endif /* (FLAC__CPU_IA32 || FLAC__CPU_X86_64) && FLAC__HAS_X86INTRIN */
|
#endif /* (FLAC__CPU_IA32 || FLAC__CPU_X86_64) && FLAC__HAS_X86INTRIN */
|
||||||
#endif /* FLAC__NO_ASM */
|
#endif /* FLAC__NO_ASM */
|
||||||
|
|||||||
@@ -34,17 +34,16 @@
|
|||||||
# include <config.h>
|
# include <config.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#include "share/compat.h"
|
|
||||||
|
|
||||||
#ifndef FLAC__NO_ASM
|
#ifndef FLAC__NO_ASM
|
||||||
#if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && defined FLAC__HAS_X86INTRIN
|
#if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && defined FLAC__HAS_X86INTRIN
|
||||||
|
#include "private/stream_encoder.h"
|
||||||
#ifdef FLAC__SSSE3_SUPPORTED
|
#ifdef FLAC__SSSE3_SUPPORTED
|
||||||
|
|
||||||
#include <stdlib.h> /* for abs() */
|
#include <stdlib.h> /* for abs() */
|
||||||
#include <tmmintrin.h> /* SSSE3 */
|
#include <tmmintrin.h> /* SSSE3 */
|
||||||
#include "FLAC/assert.h"
|
#include "FLAC/assert.h"
|
||||||
#include "private/stream_encoder.h"
|
|
||||||
|
|
||||||
|
FLAC__SSE_TARGET("ssse3")
|
||||||
void FLAC__precompute_partition_info_sums_intrin_ssse3(const FLAC__int32 residual[], FLAC__uint64 abs_residual_partition_sums[],
|
void FLAC__precompute_partition_info_sums_intrin_ssse3(const FLAC__int32 residual[], FLAC__uint64 abs_residual_partition_sums[],
|
||||||
unsigned residual_samples, unsigned predictor_order, unsigned min_partition_order, unsigned max_partition_order, unsigned bps)
|
unsigned residual_samples, unsigned predictor_order, unsigned min_partition_order, unsigned max_partition_order, unsigned bps)
|
||||||
{
|
{
|
||||||
|
|||||||
Reference in New Issue
Block a user