diff --git a/src/libFLAC/lpc_intrin_sse2.c b/src/libFLAC/lpc_intrin_sse2.c index 2902374a..ad9da79d 100644 --- a/src/libFLAC/lpc_intrin_sse2.c +++ b/src/libFLAC/lpc_intrin_sse2.c @@ -1289,6 +1289,10 @@ void FLAC__lpc_restore_signal_16_intrin_sse2(const FLAC__int32 residual[], unsig { int i; FLAC__int32 sum; + if (order < 8) { + FLAC__lpc_restore_signal(residual, data_len, qlp_coeff, order, lp_quantization, data); + return; + } FLAC__ASSERT(order > 0); FLAC__ASSERT(order <= 32); diff --git a/src/libFLAC/stream_decoder.c b/src/libFLAC/stream_decoder.c index cf06398c..cd41b5ea 100644 --- a/src/libFLAC/stream_decoder.c +++ b/src/libFLAC/stream_decoder.c @@ -417,24 +417,17 @@ static FLAC__StreamDecoderInitStatus init_stream_internal_( } #endif #ifdef FLAC__HAS_X86INTRIN -# if defined FLAC__SSE2_SUPPORTED && !defined FLAC__HAS_NASM /* not faster than asm MMX code */ +# if defined FLAC__SSE2_SUPPORTED && !defined FLAC__HAS_NASM /* OPT: not faster than ASM/MMX code */ if(decoder->private_->cpuinfo.ia32.sse2) { decoder->private_->local_lpc_restore_signal_16bit = FLAC__lpc_restore_signal_16_intrin_sse2; decoder->private_->local_lpc_restore_signal_16bit_order8 = FLAC__lpc_restore_signal_16_intrin_sse2; } # endif -# if defined FLAC__SSE4_1_SUPPORTED && 1 /* faster than asm */ +# if defined FLAC__SSE4_1_SUPPORTED && 1 /* OPT: faster than asm; TODO: more tests */ if(decoder->private_->cpuinfo.ia32.sse41) decoder->private_->local_lpc_restore_signal_64bit = FLAC__lpc_restore_signal_wide_intrin_sse41; # endif #endif -#elif defined FLAC__CPU_X86_64 -#ifdef FLAC__HAS_X86INTRIN -# if defined FLAC__SSE2_SUPPORTED - decoder->private_->local_lpc_restore_signal_16bit = FLAC__lpc_restore_signal_16_intrin_sse2; - decoder->private_->local_lpc_restore_signal_16bit_order8 = FLAC__lpc_restore_signal_16_intrin_sse2; -# endif -#endif #elif defined FLAC__CPU_PPC FLAC__ASSERT(decoder->private_->cpuinfo.type == FLAC__CPUINFO_TYPE_PPC); if(decoder->private_->cpuinfo.ppc.altivec) { diff --git a/src/libFLAC/stream_encoder.c b/src/libFLAC/stream_encoder.c index 343da4d2..d6b10842 100644 --- a/src/libFLAC/stream_encoder.c +++ b/src/libFLAC/stream_encoder.c @@ -957,7 +957,7 @@ static FLAC__StreamEncoderInitStatus init_stream_internal_( encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_sse_lag_16; # endif # ifdef FLAC__SSE2_SUPPORTED - encoder->private_->local_lpc_compute_residual_from_qlp_coefficients = FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_sse2; + /* encoder->private_->local_lpc_compute_residual_from_qlp_coefficients = FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_sse2; // OPT: not faster than C; TODO: more tests on different CPUs */ encoder->private_->local_lpc_compute_residual_from_qlp_coefficients_16bit = FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2; # endif # endif /* FLAC__HAS_X86INTRIN */