Add intrinsics version of two lpc functions.

Functions:
- FLAC__fixed_compute_best_predictor
- FLAC__fixed_compute_best_predictor_wide

Patch-from: lvqcl <lvqcl.mail@gmail.com>
This commit is contained in:
Erik de Castro Lopo
2014-04-11 06:21:09 +10:00
parent d456cdd28a
commit 93f6109c90
6 changed files with 57 additions and 5 deletions

View File

@@ -348,8 +348,10 @@ typedef struct FLAC__StreamEncoderPrivate {
void (*local_precompute_partition_info_sums)(const FLAC__int32 residual[], FLAC__uint64 abs_residual_partition_sums[], unsigned residual_samples, unsigned predictor_order, unsigned min_partition_order, unsigned max_partition_order, unsigned bps);
#ifndef FLAC__INTEGER_ONLY_LIBRARY
unsigned (*local_fixed_compute_best_predictor)(const FLAC__int32 data[], unsigned data_len, FLAC__float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1]);
unsigned (*local_fixed_compute_best_predictor_wide)(const FLAC__int32 data[], unsigned data_len, FLAC__float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1]);
#else
unsigned (*local_fixed_compute_best_predictor)(const FLAC__int32 data[], unsigned data_len, FLAC__fixedpoint residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1]);
unsigned (*local_fixed_compute_best_predictor_wide)(const FLAC__int32 data[], unsigned data_len, FLAC__fixedpoint residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1]);
#endif
#ifndef FLAC__INTEGER_ONLY_LIBRARY
void (*local_lpc_compute_autocorrelation)(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[]);
@@ -879,6 +881,7 @@ static FLAC__StreamEncoderInitStatus init_stream_internal_(
#endif
encoder->private_->local_precompute_partition_info_sums = precompute_partition_info_sums_;
encoder->private_->local_fixed_compute_best_predictor = FLAC__fixed_compute_best_predictor;
encoder->private_->local_fixed_compute_best_predictor_wide = FLAC__fixed_compute_best_predictor_wide;
#ifndef FLAC__INTEGER_ONLY_LIBRARY
encoder->private_->local_lpc_compute_residual_from_qlp_coefficients = FLAC__lpc_compute_residual_from_qlp_coefficients;
encoder->private_->local_lpc_compute_residual_from_qlp_coefficients_64bit = FLAC__lpc_compute_residual_from_qlp_coefficients_wide;
@@ -937,6 +940,17 @@ static FLAC__StreamEncoderInitStatus init_stream_internal_(
encoder->private_->local_lpc_compute_residual_from_qlp_coefficients = FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_sse2;
encoder->private_->local_lpc_compute_residual_from_qlp_coefficients_16bit = FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2;
}
# ifdef FLAC__SSSE3_SUPPORTED
if (encoder->private_->cpuinfo.ia32.ssse3) {
encoder->private_->local_fixed_compute_best_predictor = FLAC__fixed_compute_best_predictor_intrin_ssse3;
encoder->private_->local_fixed_compute_best_predictor_wide = FLAC__fixed_compute_best_predictor_wide_intrin_ssse3;
}
else
# endif
if (encoder->private_->cpuinfo.ia32.sse2) {
encoder->private_->local_fixed_compute_best_predictor = FLAC__fixed_compute_best_predictor_intrin_sse2;
encoder->private_->local_fixed_compute_best_predictor_wide = FLAC__fixed_compute_best_predictor_wide_intrin_sse2;
}
# endif
# ifdef FLAC__SSE4_1_SUPPORTED
if(encoder->private_->cpuinfo.ia32.sse41)
@@ -959,6 +973,15 @@ static FLAC__StreamEncoderInitStatus init_stream_internal_(
# ifdef FLAC__SSE2_SUPPORTED
/* encoder->private_->local_lpc_compute_residual_from_qlp_coefficients = FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_sse2; // OPT: not faster than C; TODO: more tests on different CPUs */
encoder->private_->local_lpc_compute_residual_from_qlp_coefficients_16bit = FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2;
# ifdef FLAC__SSSE3_SUPPORTED
if (encoder->private_->cpuinfo.x86_64.ssse3) {
encoder->private_->local_fixed_compute_best_predictor = FLAC__fixed_compute_best_predictor_intrin_ssse3;
encoder->private_->local_fixed_compute_best_predictor_wide = FLAC__fixed_compute_best_predictor_wide_intrin_ssse3;
}
else
# endif
encoder->private_->local_fixed_compute_best_predictor = FLAC__fixed_compute_best_predictor_intrin_sse2;
encoder->private_->local_fixed_compute_best_predictor_wide = FLAC__fixed_compute_best_predictor_wide_intrin_sse2;
# endif
# endif /* FLAC__HAS_X86INTRIN */
# endif /* FLAC__CPU_... */
@@ -991,7 +1014,7 @@ static FLAC__StreamEncoderInitStatus init_stream_internal_(
#endif /* !FLAC__NO_ASM && FLAC__HAS_X86INTRIN */
/* finally override based on wide-ness if necessary */
if(encoder->private_->use_wide_by_block) {
encoder->private_->local_fixed_compute_best_predictor = FLAC__fixed_compute_best_predictor_wide;
encoder->private_->local_fixed_compute_best_predictor = encoder->private_->local_fixed_compute_best_predictor_wide;
}
/* set state to OK; from here on, errors are fatal and we'll override the state then */