From cf0e42ae6e8074655c7955cbff139fac1b203a23 Mon Sep 17 00:00:00 2001 From: Erik de Castro Lopo Date: Mon, 24 Feb 2014 21:45:32 +1100 Subject: [PATCH] Don't use intrinsics when they are slower. More thorough en-/decoding tests show that sometimes the functions that use intrinsics are slower (or not really faster) than old plain C functions. After this patch the encoder doesn't use these new functions when their usefulness is questionable. Patch-from: lvqcl --- src/libFLAC/lpc_intrin_sse2.c | 4 ++++ src/libFLAC/stream_decoder.c | 11 ++--------- src/libFLAC/stream_encoder.c | 2 +- 3 files changed, 7 insertions(+), 10 deletions(-) diff --git a/src/libFLAC/lpc_intrin_sse2.c b/src/libFLAC/lpc_intrin_sse2.c index 2902374a..ad9da79d 100644 --- a/src/libFLAC/lpc_intrin_sse2.c +++ b/src/libFLAC/lpc_intrin_sse2.c @@ -1289,6 +1289,10 @@ void FLAC__lpc_restore_signal_16_intrin_sse2(const FLAC__int32 residual[], unsig { int i; FLAC__int32 sum; + if (order < 8) { + FLAC__lpc_restore_signal(residual, data_len, qlp_coeff, order, lp_quantization, data); + return; + } FLAC__ASSERT(order > 0); FLAC__ASSERT(order <= 32); diff --git a/src/libFLAC/stream_decoder.c b/src/libFLAC/stream_decoder.c index cf06398c..cd41b5ea 100644 --- a/src/libFLAC/stream_decoder.c +++ b/src/libFLAC/stream_decoder.c @@ -417,24 +417,17 @@ static FLAC__StreamDecoderInitStatus init_stream_internal_( } #endif #ifdef FLAC__HAS_X86INTRIN -# if defined FLAC__SSE2_SUPPORTED && !defined FLAC__HAS_NASM /* not faster than asm MMX code */ +# if defined FLAC__SSE2_SUPPORTED && !defined FLAC__HAS_NASM /* OPT: not faster than ASM/MMX code */ if(decoder->private_->cpuinfo.ia32.sse2) { decoder->private_->local_lpc_restore_signal_16bit = FLAC__lpc_restore_signal_16_intrin_sse2; decoder->private_->local_lpc_restore_signal_16bit_order8 = FLAC__lpc_restore_signal_16_intrin_sse2; } # endif -# if defined FLAC__SSE4_1_SUPPORTED && 1 /* faster than asm */ +# if defined FLAC__SSE4_1_SUPPORTED && 1 /* OPT: faster than asm; TODO: more tests */ if(decoder->private_->cpuinfo.ia32.sse41) decoder->private_->local_lpc_restore_signal_64bit = FLAC__lpc_restore_signal_wide_intrin_sse41; # endif #endif -#elif defined FLAC__CPU_X86_64 -#ifdef FLAC__HAS_X86INTRIN -# if defined FLAC__SSE2_SUPPORTED - decoder->private_->local_lpc_restore_signal_16bit = FLAC__lpc_restore_signal_16_intrin_sse2; - decoder->private_->local_lpc_restore_signal_16bit_order8 = FLAC__lpc_restore_signal_16_intrin_sse2; -# endif -#endif #elif defined FLAC__CPU_PPC FLAC__ASSERT(decoder->private_->cpuinfo.type == FLAC__CPUINFO_TYPE_PPC); if(decoder->private_->cpuinfo.ppc.altivec) { diff --git a/src/libFLAC/stream_encoder.c b/src/libFLAC/stream_encoder.c index 343da4d2..d6b10842 100644 --- a/src/libFLAC/stream_encoder.c +++ b/src/libFLAC/stream_encoder.c @@ -957,7 +957,7 @@ static FLAC__StreamEncoderInitStatus init_stream_internal_( encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_sse_lag_16; # endif # ifdef FLAC__SSE2_SUPPORTED - encoder->private_->local_lpc_compute_residual_from_qlp_coefficients = FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_sse2; + /* encoder->private_->local_lpc_compute_residual_from_qlp_coefficients = FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_sse2; // OPT: not faster than C; TODO: more tests on different CPUs */ encoder->private_->local_lpc_compute_residual_from_qlp_coefficients_16bit = FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2; # endif # endif /* FLAC__HAS_X86INTRIN */