Add sse2 intrinscics code for lpc_restore_signal_...()

The new functions are analogous to FLAC__lpc_restore_signal_asm_ia32_mmx.
FLAC uses them for x86-64 arch and also for ia32 if NASM is not available.

Patch-from: lvqcl <lvqcl.mail@gmail.com>
This commit is contained in:
Erik de Castro Lopo
2014-02-02 08:55:49 +11:00
parent d163ef4567
commit 26b9546149
3 changed files with 1250 additions and 0 deletions

View File

@@ -200,6 +200,10 @@ void FLAC__lpc_restore_signal_asm_ppc_altivec_16(const FLAC__int32 residual[], u
void FLAC__lpc_restore_signal_asm_ppc_altivec_16_order8(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[]); void FLAC__lpc_restore_signal_asm_ppc_altivec_16_order8(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[]);
# endif /* FLAC__CPU_IA32 || FLAC__CPU_PPC */ # endif /* FLAC__CPU_IA32 || FLAC__CPU_PPC */
# if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && defined FLAC__HAS_X86INTRIN # if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && defined FLAC__HAS_X86INTRIN
# ifdef FLAC__SSE2_SUPPORTED
void FLAC__lpc_restore_signal_16_intrin_sse2(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[]);
void FLAC__lpc_restore_signal_intrin_sse2(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[]);
# endif
# ifdef FLAC__SSE4_1_SUPPORTED # ifdef FLAC__SSE4_1_SUPPORTED
void FLAC__lpc_restore_signal_wide_intrin_sse41(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[]); void FLAC__lpc_restore_signal_wide_intrin_sse41(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[]);
# endif # endif

File diff suppressed because it is too large Load Diff

View File

@@ -417,11 +417,24 @@ static FLAC__StreamDecoderInitStatus init_stream_internal_(
} }
#endif #endif
#ifdef FLAC__HAS_X86INTRIN #ifdef FLAC__HAS_X86INTRIN
# if defined FLAC__SSE2_SUPPORTED && !defined FLAC__HAS_NASM /* not faster than asm MMX code */
if(decoder->private_->cpuinfo.ia32.sse2) {
decoder->private_->local_lpc_restore_signal_16bit = FLAC__lpc_restore_signal_16_intrin_sse2;
decoder->private_->local_lpc_restore_signal_16bit_order8 = FLAC__lpc_restore_signal_16_intrin_sse2;
}
# endif
# if defined FLAC__SSE4_1_SUPPORTED && 1 /* faster than asm */ # if defined FLAC__SSE4_1_SUPPORTED && 1 /* faster than asm */
if(decoder->private_->cpuinfo.ia32.sse41) if(decoder->private_->cpuinfo.ia32.sse41)
decoder->private_->local_lpc_restore_signal_64bit = FLAC__lpc_restore_signal_wide_intrin_sse41; decoder->private_->local_lpc_restore_signal_64bit = FLAC__lpc_restore_signal_wide_intrin_sse41;
# endif # endif
#endif #endif
#elif defined FLAC__CPU_X86_64
#ifdef FLAC__HAS_X86INTRIN
# if defined FLAC__SSE2_SUPPORTED
decoder->private_->local_lpc_restore_signal_16bit = FLAC__lpc_restore_signal_16_intrin_sse2;
decoder->private_->local_lpc_restore_signal_16bit_order8 = FLAC__lpc_restore_signal_16_intrin_sse2;
# endif
#endif
#elif defined FLAC__CPU_PPC #elif defined FLAC__CPU_PPC
FLAC__ASSERT(decoder->private_->cpuinfo.type == FLAC__CPUINFO_TYPE_PPC); FLAC__ASSERT(decoder->private_->cpuinfo.type == FLAC__CPUINFO_TYPE_PPC);
if(decoder->private_->cpuinfo.ppc.altivec) { if(decoder->private_->cpuinfo.ppc.altivec) {