diff --git a/src/libFLAC/stream_decoder.c b/src/libFLAC/stream_decoder.c index 7ccc526b..ddd8979c 100644 --- a/src/libFLAC/stream_decoder.c +++ b/src/libFLAC/stream_decoder.c @@ -404,7 +404,7 @@ static FLAC__StreamDecoderInitStatus init_stream_internal_( if(decoder->private_->cpuinfo.ia32.bswap) decoder->private_->local_bitreader_read_rice_signed_block = FLAC__bitreader_read_rice_signed_block_asm_ia32_bswap; #endif - decoder->private_->local_lpc_restore_signal_64bit = FLAC__lpc_restore_signal_wide_asm_ia32; + decoder->private_->local_lpc_restore_signal_64bit = FLAC__lpc_restore_signal_wide_asm_ia32; /* OPT_IA32: was really necessary for GCC < 4.9 */ if(decoder->private_->cpuinfo.ia32.mmx) { decoder->private_->local_lpc_restore_signal = FLAC__lpc_restore_signal_asm_ia32; decoder->private_->local_lpc_restore_signal_16bit = FLAC__lpc_restore_signal_asm_ia32_mmx; @@ -417,13 +417,13 @@ static FLAC__StreamDecoderInitStatus init_stream_internal_( } #endif #ifdef FLAC__HAS_X86INTRIN -# if defined FLAC__SSE2_SUPPORTED && !defined FLAC__HAS_NASM /* OPT: not faster than ASM/MMX code */ +# if defined FLAC__SSE2_SUPPORTED && !defined FLAC__HAS_NASM /* OPT_SSE: not faster than ASM/MMX code */ if(decoder->private_->cpuinfo.ia32.sse2) { decoder->private_->local_lpc_restore_signal_16bit = FLAC__lpc_restore_signal_16_intrin_sse2; decoder->private_->local_lpc_restore_signal_16bit_order8 = FLAC__lpc_restore_signal_16_intrin_sse2; } # endif -# if defined FLAC__SSE4_1_SUPPORTED && 1 /* OPT: faster than asm; TODO: more tests */ +# if defined FLAC__SSE4_1_SUPPORTED && 1 /* OPT_SSE: faster than asm; TODO: more tests */ if(decoder->private_->cpuinfo.ia32.sse41) decoder->private_->local_lpc_restore_signal_64bit = FLAC__lpc_restore_signal_wide_intrin_sse41; # endif diff --git a/src/libFLAC/stream_encoder.c b/src/libFLAC/stream_encoder.c index e21f3747..e64ece27 100644 --- a/src/libFLAC/stream_encoder.c +++ b/src/libFLAC/stream_encoder.c @@ -894,7 +894,6 @@ static FLAC__StreamEncoderInitStatus init_stream_internal_( # ifdef FLAC__CPU_IA32 FLAC__ASSERT(encoder->private_->cpuinfo.type == FLAC__CPUINFO_TYPE_IA32); # ifdef FLAC__HAS_NASM - encoder->private_->local_lpc_compute_residual_from_qlp_coefficients_64bit = FLAC__lpc_compute_residual_from_qlp_coefficients_wide_asm_ia32; if(encoder->private_->cpuinfo.ia32.sse) { if(encoder->protected_->max_lpc_order < 4) encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_asm_ia32_sse_lag_4; @@ -908,9 +907,11 @@ static FLAC__StreamEncoderInitStatus init_stream_internal_( encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_asm_ia32; } else if(encoder->private_->cpuinfo.ia32._3dnow) - encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_asm_ia32_3dnow; + encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_asm_ia32_3dnow; /* obsolete instruction set */ else encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_asm_ia32; + + encoder->private_->local_lpc_compute_residual_from_qlp_coefficients_64bit = FLAC__lpc_compute_residual_from_qlp_coefficients_wide_asm_ia32; /* OPT_IA32: was really necessary for GCC < 4.9 */ if(encoder->private_->cpuinfo.ia32.mmx) { encoder->private_->local_lpc_compute_residual_from_qlp_coefficients = FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32; encoder->private_->local_lpc_compute_residual_from_qlp_coefficients_16bit = FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32_mmx; @@ -919,11 +920,12 @@ static FLAC__StreamEncoderInitStatus init_stream_internal_( encoder->private_->local_lpc_compute_residual_from_qlp_coefficients = FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32; encoder->private_->local_lpc_compute_residual_from_qlp_coefficients_16bit = FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32; } + if(encoder->private_->cpuinfo.ia32.mmx && encoder->private_->cpuinfo.ia32.cmov) encoder->private_->local_fixed_compute_best_predictor = FLAC__fixed_compute_best_predictor_asm_ia32_mmx_cmov; # endif /* FLAC__HAS_NASM */ # ifdef FLAC__HAS_X86INTRIN -# if defined FLAC__SSE_SUPPORTED && !defined FLAC__HAS_NASM +# if defined FLAC__SSE_SUPPORTED if(encoder->private_->cpuinfo.ia32.sse) { if(encoder->protected_->max_lpc_order < 4) encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_sse_lag_4; @@ -940,6 +942,12 @@ static FLAC__StreamEncoderInitStatus init_stream_internal_( encoder->private_->local_lpc_compute_residual_from_qlp_coefficients = FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_sse2; encoder->private_->local_lpc_compute_residual_from_qlp_coefficients_16bit = FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2; } +# ifdef FLAC__SSE4_1_SUPPORTED + if(encoder->private_->cpuinfo.ia32.sse41) { + encoder->private_->local_lpc_compute_residual_from_qlp_coefficients_64bit = FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_sse41; + } +# endif + # ifdef FLAC__SSSE3_SUPPORTED if (encoder->private_->cpuinfo.ia32.ssse3) { encoder->private_->local_fixed_compute_best_predictor = FLAC__fixed_compute_best_predictor_intrin_ssse3; @@ -952,10 +960,6 @@ static FLAC__StreamEncoderInitStatus init_stream_internal_( encoder->private_->local_fixed_compute_best_predictor_wide = FLAC__fixed_compute_best_predictor_wide_intrin_sse2; } # endif -# ifdef FLAC__SSE4_1_SUPPORTED - if(encoder->private_->cpuinfo.ia32.sse41) - encoder->private_->local_lpc_compute_residual_from_qlp_coefficients_64bit = FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_sse41; -# endif # endif /* FLAC__HAS_X86INTRIN */ # elif defined FLAC__CPU_X86_64 FLAC__ASSERT(encoder->private_->cpuinfo.type == FLAC__CPUINFO_TYPE_X86_64); @@ -971,8 +975,9 @@ static FLAC__StreamEncoderInitStatus init_stream_internal_( encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_sse_lag_16; # endif # ifdef FLAC__SSE2_SUPPORTED - /* encoder->private_->local_lpc_compute_residual_from_qlp_coefficients = FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_sse2; // OPT: not faster than C; TODO: more tests on different CPUs */ + /* encoder->private_->local_lpc_compute_residual_from_qlp_coefficients = FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_sse2; // OPT_SSE: not faster than C; TODO: more tests on different CPUs */ encoder->private_->local_lpc_compute_residual_from_qlp_coefficients_16bit = FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2; + # ifdef FLAC__SSSE3_SUPPORTED if (encoder->private_->cpuinfo.x86_64.ssse3) { encoder->private_->local_fixed_compute_best_predictor = FLAC__fixed_compute_best_predictor_intrin_ssse3;