mirror of
https://github.com/claunia/flac.git
synced 2025-12-16 18:54:26 +00:00
Improve encoding speed on older Intel CPUs.
The commit http://git.xiph.org/?p=flac.git;a=commit;h=e9d805dd4374 changed the that calculate autocorrelation. However, the new code worked slightly (about 4%) slower on Core 2, but with the new presets the speed decrease can reach ~25%. This patch enables both old and new functions and chooses between them at runtime. Patch-from: lvqcl <lvqcl.mail@gmail.com>
This commit is contained in:
@@ -164,6 +164,8 @@ void FLAC__cpu_info(FLAC__CPUInfo *info)
|
||||
/* http://www.sandpile.org/x86/cpuid.htm */
|
||||
#ifdef FLAC__HAS_X86INTRIN
|
||||
FLAC__uint32 flags_eax, flags_ebx, flags_ecx, flags_edx;
|
||||
FLAC__cpu_info_x86(0, &flags_eax, &flags_ebx, &flags_ecx, &flags_edx);
|
||||
info->ia32.intel = (flags_ebx == 0x756E6547 && flags_edx == 0x49656E69 && flags_ecx == 0x6C65746E)? true : false; /* GenuineIntel */
|
||||
FLAC__cpu_info_x86(1, &flags_eax, &flags_ebx, &flags_ecx, &flags_edx);
|
||||
#else
|
||||
FLAC__uint32 flags_ecx, flags_edx;
|
||||
@@ -347,6 +349,8 @@ void FLAC__cpu_info(FLAC__CPUInfo *info)
|
||||
{
|
||||
/* http://www.sandpile.org/x86/cpuid.htm */
|
||||
FLAC__uint32 flags_eax, flags_ebx, flags_ecx, flags_edx;
|
||||
FLAC__cpu_info_x86(0, &flags_eax, &flags_ebx, &flags_ecx, &flags_edx);
|
||||
info->x86.intel = (flags_ebx == 0x756E6547 && flags_edx == 0x49656E69 && flags_ecx == 0x6C65746E)? true : false; /* GenuineIntel */
|
||||
FLAC__cpu_info_x86(1, &flags_eax, &flags_ebx, &flags_ecx, &flags_edx);
|
||||
info->x86.sse3 = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_SSE3 )? true : false;
|
||||
info->x86.ssse3 = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_SSSE3)? true : false;
|
||||
|
||||
@@ -36,10 +36,10 @@
|
||||
data_section
|
||||
|
||||
cglobal FLAC__lpc_compute_autocorrelation_asm_ia32
|
||||
cglobal FLAC__lpc_compute_autocorrelation_asm_ia32_sse_lag_4
|
||||
cglobal FLAC__lpc_compute_autocorrelation_asm_ia32_sse_lag_8
|
||||
cglobal FLAC__lpc_compute_autocorrelation_asm_ia32_sse_lag_12
|
||||
cglobal FLAC__lpc_compute_autocorrelation_asm_ia32_sse_lag_16
|
||||
cglobal FLAC__lpc_compute_autocorrelation_asm_ia32_sse_lag_4_old
|
||||
cglobal FLAC__lpc_compute_autocorrelation_asm_ia32_sse_lag_8_old
|
||||
cglobal FLAC__lpc_compute_autocorrelation_asm_ia32_sse_lag_12_old
|
||||
cglobal FLAC__lpc_compute_autocorrelation_asm_ia32_sse_lag_16_old
|
||||
cglobal FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32
|
||||
cglobal FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32_mmx
|
||||
cglobal FLAC__lpc_compute_residual_from_qlp_coefficients_wide_asm_ia32
|
||||
@@ -443,7 +443,7 @@ cident FLAC__lpc_compute_autocorrelation_asm_ia32
|
||||
ret
|
||||
|
||||
ALIGN 16
|
||||
cident FLAC__lpc_compute_autocorrelation_asm_ia32_sse_lag_4
|
||||
cident FLAC__lpc_compute_autocorrelation_asm_ia32_sse_lag_4_old
|
||||
;[esp + 16] == autoc[]
|
||||
;[esp + 12] == lag
|
||||
;[esp + 8] == data_len
|
||||
@@ -490,7 +490,7 @@ cident FLAC__lpc_compute_autocorrelation_asm_ia32_sse_lag_4
|
||||
ret
|
||||
|
||||
ALIGN 16
|
||||
cident FLAC__lpc_compute_autocorrelation_asm_ia32_sse_lag_8
|
||||
cident FLAC__lpc_compute_autocorrelation_asm_ia32_sse_lag_8_old
|
||||
;[esp + 16] == autoc[]
|
||||
;[esp + 12] == lag
|
||||
;[esp + 8] == data_len
|
||||
@@ -549,7 +549,7 @@ cident FLAC__lpc_compute_autocorrelation_asm_ia32_sse_lag_8
|
||||
ret
|
||||
|
||||
ALIGN 16
|
||||
cident FLAC__lpc_compute_autocorrelation_asm_ia32_sse_lag_12
|
||||
cident FLAC__lpc_compute_autocorrelation_asm_ia32_sse_lag_12_old
|
||||
;[esp + 16] == autoc[]
|
||||
;[esp + 12] == lag
|
||||
;[esp + 8] == data_len
|
||||
@@ -623,7 +623,7 @@ cident FLAC__lpc_compute_autocorrelation_asm_ia32_sse_lag_12
|
||||
ret
|
||||
|
||||
ALIGN 16
|
||||
cident FLAC__lpc_compute_autocorrelation_asm_ia32_sse_lag_16
|
||||
cident FLAC__lpc_compute_autocorrelation_asm_ia32_sse_lag_16_old
|
||||
;[ebp + 20] == autoc[]
|
||||
;[ebp + 16] == lag
|
||||
;[ebp + 12] == data_len
|
||||
|
||||
@@ -116,6 +116,8 @@ typedef enum {
|
||||
|
||||
#if defined FLAC__CPU_IA32
|
||||
typedef struct {
|
||||
FLAC__bool intel;
|
||||
|
||||
FLAC__bool cmov;
|
||||
FLAC__bool mmx;
|
||||
FLAC__bool sse;
|
||||
@@ -131,6 +133,8 @@ typedef struct {
|
||||
} FLAC__CPUInfo_IA32;
|
||||
#elif defined FLAC__CPU_X86_64
|
||||
typedef struct {
|
||||
FLAC__bool intel;
|
||||
|
||||
FLAC__bool sse3;
|
||||
FLAC__bool ssse3;
|
||||
FLAC__bool sse41;
|
||||
|
||||
@@ -73,18 +73,22 @@ void FLAC__lpc_compute_autocorrelation(const FLAC__real data[], unsigned data_le
|
||||
# ifdef FLAC__CPU_IA32
|
||||
# ifdef FLAC__HAS_NASM
|
||||
void FLAC__lpc_compute_autocorrelation_asm_ia32(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[]);
|
||||
void FLAC__lpc_compute_autocorrelation_asm_ia32_sse_lag_4(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[]);
|
||||
void FLAC__lpc_compute_autocorrelation_asm_ia32_sse_lag_8(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[]);
|
||||
void FLAC__lpc_compute_autocorrelation_asm_ia32_sse_lag_12(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[]);
|
||||
void FLAC__lpc_compute_autocorrelation_asm_ia32_sse_lag_16(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[]);
|
||||
void FLAC__lpc_compute_autocorrelation_asm_ia32_sse_lag_4_old(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[]);
|
||||
void FLAC__lpc_compute_autocorrelation_asm_ia32_sse_lag_8_old(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[]);
|
||||
void FLAC__lpc_compute_autocorrelation_asm_ia32_sse_lag_12_old(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[]);
|
||||
void FLAC__lpc_compute_autocorrelation_asm_ia32_sse_lag_16_old(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[]);
|
||||
# endif
|
||||
# endif
|
||||
# if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && defined FLAC__HAS_X86INTRIN
|
||||
# ifdef FLAC__SSE_SUPPORTED
|
||||
void FLAC__lpc_compute_autocorrelation_intrin_sse_lag_4(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[]);
|
||||
void FLAC__lpc_compute_autocorrelation_intrin_sse_lag_8(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[]);
|
||||
void FLAC__lpc_compute_autocorrelation_intrin_sse_lag_12(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[]);
|
||||
void FLAC__lpc_compute_autocorrelation_intrin_sse_lag_16(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[]);
|
||||
void FLAC__lpc_compute_autocorrelation_intrin_sse_lag_4_old(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[]);
|
||||
void FLAC__lpc_compute_autocorrelation_intrin_sse_lag_8_old(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[]);
|
||||
void FLAC__lpc_compute_autocorrelation_intrin_sse_lag_12_old(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[]);
|
||||
void FLAC__lpc_compute_autocorrelation_intrin_sse_lag_16_old(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[]);
|
||||
void FLAC__lpc_compute_autocorrelation_intrin_sse_lag_4_new(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[]);
|
||||
void FLAC__lpc_compute_autocorrelation_intrin_sse_lag_8_new(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[]);
|
||||
void FLAC__lpc_compute_autocorrelation_intrin_sse_lag_12_new(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[]);
|
||||
void FLAC__lpc_compute_autocorrelation_intrin_sse_lag_16_new(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[]);
|
||||
# endif
|
||||
# endif
|
||||
#endif
|
||||
|
||||
@@ -45,11 +45,15 @@
|
||||
|
||||
#include <xmmintrin.h> /* SSE */
|
||||
|
||||
#if 1
|
||||
/* Faster on current Intel (starting from Core i aka Nehalem) and all AMD CPUs */
|
||||
/* new routines: more unaligned loads, less shuffle
|
||||
* old routines: less unaligned loads, more shuffle
|
||||
* these *_old routines are equivalent to the ASM routines in ia32/lpc_asm.nasm
|
||||
*/
|
||||
|
||||
/* new routines: faster on current Intel (starting from Core i aka Nehalem) and all AMD CPUs */
|
||||
|
||||
FLAC__SSE_TARGET("sse")
|
||||
void FLAC__lpc_compute_autocorrelation_intrin_sse_lag_4(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[])
|
||||
void FLAC__lpc_compute_autocorrelation_intrin_sse_lag_4_new(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[])
|
||||
{
|
||||
int i;
|
||||
int limit = data_len - 4;
|
||||
@@ -85,7 +89,7 @@ void FLAC__lpc_compute_autocorrelation_intrin_sse_lag_4(const FLAC__real data[],
|
||||
}
|
||||
|
||||
FLAC__SSE_TARGET("sse")
|
||||
void FLAC__lpc_compute_autocorrelation_intrin_sse_lag_8(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[])
|
||||
void FLAC__lpc_compute_autocorrelation_intrin_sse_lag_8_new(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[])
|
||||
{
|
||||
int i;
|
||||
int limit = data_len - 8;
|
||||
@@ -129,7 +133,7 @@ void FLAC__lpc_compute_autocorrelation_intrin_sse_lag_8(const FLAC__real data[],
|
||||
}
|
||||
|
||||
FLAC__SSE_TARGET("sse")
|
||||
void FLAC__lpc_compute_autocorrelation_intrin_sse_lag_12(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[])
|
||||
void FLAC__lpc_compute_autocorrelation_intrin_sse_lag_12_new(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[])
|
||||
{
|
||||
int i;
|
||||
int limit = data_len - 12;
|
||||
@@ -181,7 +185,7 @@ void FLAC__lpc_compute_autocorrelation_intrin_sse_lag_12(const FLAC__real data[]
|
||||
}
|
||||
|
||||
FLAC__SSE_TARGET("sse")
|
||||
void FLAC__lpc_compute_autocorrelation_intrin_sse_lag_16(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[])
|
||||
void FLAC__lpc_compute_autocorrelation_intrin_sse_lag_16_new(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[])
|
||||
{
|
||||
int i;
|
||||
int limit = data_len - 16;
|
||||
@@ -240,11 +244,10 @@ void FLAC__lpc_compute_autocorrelation_intrin_sse_lag_16(const FLAC__real data[]
|
||||
_mm_storeu_ps(autoc+12,sum3);
|
||||
}
|
||||
|
||||
#else
|
||||
/* Faster on older Intel CPUs (up to Core 2) */
|
||||
/* old routines: faster on older Intel CPUs (up to Core 2) */
|
||||
|
||||
FLAC__SSE_TARGET("sse")
|
||||
void FLAC__lpc_compute_autocorrelation_intrin_sse_lag_4(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[])
|
||||
void FLAC__lpc_compute_autocorrelation_intrin_sse_lag_4_old(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[])
|
||||
{
|
||||
__m128 xmm0, xmm2, xmm5;
|
||||
|
||||
@@ -281,7 +284,7 @@ void FLAC__lpc_compute_autocorrelation_intrin_sse_lag_4(const FLAC__real data[],
|
||||
}
|
||||
|
||||
FLAC__SSE_TARGET("sse")
|
||||
void FLAC__lpc_compute_autocorrelation_intrin_sse_lag_8(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[])
|
||||
void FLAC__lpc_compute_autocorrelation_intrin_sse_lag_8_old(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[])
|
||||
{
|
||||
__m128 xmm0, xmm1, xmm2, xmm3, xmm5, xmm6;
|
||||
|
||||
@@ -327,7 +330,7 @@ void FLAC__lpc_compute_autocorrelation_intrin_sse_lag_8(const FLAC__real data[],
|
||||
}
|
||||
|
||||
FLAC__SSE_TARGET("sse")
|
||||
void FLAC__lpc_compute_autocorrelation_intrin_sse_lag_12(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[])
|
||||
void FLAC__lpc_compute_autocorrelation_intrin_sse_lag_12_old(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[])
|
||||
{
|
||||
__m128 xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7;
|
||||
|
||||
@@ -381,7 +384,7 @@ void FLAC__lpc_compute_autocorrelation_intrin_sse_lag_12(const FLAC__real data[]
|
||||
}
|
||||
|
||||
FLAC__SSE_TARGET("sse")
|
||||
void FLAC__lpc_compute_autocorrelation_intrin_sse_lag_16(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[])
|
||||
void FLAC__lpc_compute_autocorrelation_intrin_sse_lag_16_old(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[])
|
||||
{
|
||||
__m128 xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9;
|
||||
|
||||
@@ -443,7 +446,6 @@ void FLAC__lpc_compute_autocorrelation_intrin_sse_lag_16(const FLAC__real data[]
|
||||
_mm_storeu_ps(autoc+8, xmm8);
|
||||
_mm_storeu_ps(autoc+12,xmm9);
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* FLAC__SSE_SUPPORTED */
|
||||
#endif /* (FLAC__CPU_IA32 || FLAC__CPU_X86_64) && FLAC__HAS_X86INTRIN */
|
||||
|
||||
@@ -898,13 +898,13 @@ static FLAC__StreamEncoderInitStatus init_stream_internal_(
|
||||
# ifdef FLAC__HAS_NASM
|
||||
if(encoder->private_->cpuinfo.ia32.sse) {
|
||||
if(encoder->protected_->max_lpc_order < 4)
|
||||
encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_asm_ia32_sse_lag_4;
|
||||
encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_asm_ia32_sse_lag_4_old;
|
||||
else if(encoder->protected_->max_lpc_order < 8)
|
||||
encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_asm_ia32_sse_lag_8;
|
||||
encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_asm_ia32_sse_lag_8_old;
|
||||
else if(encoder->protected_->max_lpc_order < 12)
|
||||
encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_asm_ia32_sse_lag_12;
|
||||
encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_asm_ia32_sse_lag_12_old;
|
||||
else if(encoder->protected_->max_lpc_order < 16)
|
||||
encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_asm_ia32_sse_lag_16;
|
||||
encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_asm_ia32_sse_lag_16_old;
|
||||
else
|
||||
encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_asm_ia32;
|
||||
}
|
||||
@@ -927,16 +927,30 @@ static FLAC__StreamEncoderInitStatus init_stream_internal_(
|
||||
# ifdef FLAC__HAS_X86INTRIN
|
||||
# if defined FLAC__SSE_SUPPORTED
|
||||
if(encoder->private_->cpuinfo.ia32.sse) {
|
||||
if(encoder->protected_->max_lpc_order < 4)
|
||||
encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_sse_lag_4;
|
||||
else if(encoder->protected_->max_lpc_order < 8)
|
||||
encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_sse_lag_8;
|
||||
else if(encoder->protected_->max_lpc_order < 12)
|
||||
encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_sse_lag_12;
|
||||
else if(encoder->protected_->max_lpc_order < 16)
|
||||
encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_sse_lag_16;
|
||||
else
|
||||
encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation;
|
||||
if(encoder->private_->cpuinfo.ia32.sse42 || !encoder->private_->cpuinfo.ia32.intel) { /* use new autocorrelation functions */
|
||||
if(encoder->protected_->max_lpc_order < 4)
|
||||
encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_sse_lag_4_new;
|
||||
else if(encoder->protected_->max_lpc_order < 8)
|
||||
encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_sse_lag_8_new;
|
||||
else if(encoder->protected_->max_lpc_order < 12)
|
||||
encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_sse_lag_12_new;
|
||||
else if(encoder->protected_->max_lpc_order < 16)
|
||||
encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_sse_lag_16_new;
|
||||
else
|
||||
encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation;
|
||||
}
|
||||
else { /* use old autocorrelation functions */
|
||||
if(encoder->protected_->max_lpc_order < 4)
|
||||
encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_sse_lag_4_old;
|
||||
else if(encoder->protected_->max_lpc_order < 8)
|
||||
encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_sse_lag_8_old;
|
||||
else if(encoder->protected_->max_lpc_order < 12)
|
||||
encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_sse_lag_12_old;
|
||||
else if(encoder->protected_->max_lpc_order < 16)
|
||||
encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_sse_lag_16_old;
|
||||
else
|
||||
encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation;
|
||||
}
|
||||
}
|
||||
# endif
|
||||
|
||||
@@ -977,14 +991,26 @@ static FLAC__StreamEncoderInitStatus init_stream_internal_(
|
||||
FLAC__ASSERT(encoder->private_->cpuinfo.type == FLAC__CPUINFO_TYPE_X86_64);
|
||||
# ifdef FLAC__HAS_X86INTRIN
|
||||
# ifdef FLAC__SSE_SUPPORTED
|
||||
if(encoder->protected_->max_lpc_order < 4)
|
||||
encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_sse_lag_4;
|
||||
else if(encoder->protected_->max_lpc_order < 8)
|
||||
encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_sse_lag_8;
|
||||
else if(encoder->protected_->max_lpc_order < 12)
|
||||
encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_sse_lag_12;
|
||||
else if(encoder->protected_->max_lpc_order < 16)
|
||||
encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_sse_lag_16;
|
||||
if(encoder->private_->cpuinfo.x86.sse42 || !encoder->private_->cpuinfo.x86.intel) { /* use new autocorrelation functions */
|
||||
if(encoder->protected_->max_lpc_order < 4)
|
||||
encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_sse_lag_4_new;
|
||||
else if(encoder->protected_->max_lpc_order < 8)
|
||||
encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_sse_lag_8_new;
|
||||
else if(encoder->protected_->max_lpc_order < 12)
|
||||
encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_sse_lag_12_new;
|
||||
else if(encoder->protected_->max_lpc_order < 16)
|
||||
encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_sse_lag_16_new;
|
||||
}
|
||||
else {
|
||||
if(encoder->protected_->max_lpc_order < 4)
|
||||
encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_sse_lag_4_old;
|
||||
else if(encoder->protected_->max_lpc_order < 8)
|
||||
encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_sse_lag_8_old;
|
||||
else if(encoder->protected_->max_lpc_order < 12)
|
||||
encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_sse_lag_12_old;
|
||||
else if(encoder->protected_->max_lpc_order < 16)
|
||||
encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_sse_lag_16_old;
|
||||
}
|
||||
# endif
|
||||
|
||||
# ifdef FLAC__SSE2_SUPPORTED
|
||||
|
||||
Reference in New Issue
Block a user