mirror of
https://github.com/claunia/flac.git
synced 2025-12-16 18:54:26 +00:00
Adds SSE-accelerated lpc functions.
New functions are:
FLAC__lpc_compute_autocorrelation_intrin_sse_lag_4()
FLAC__lpc_compute_autocorrelation_intrin_sse_lag_8()
FLAC__lpc_compute_autocorrelation_intrin_sse_lag_12()
FLAC__lpc_compute_autocorrelation_intrin_sse_lag_16()
FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2()
Patch-from: lvqcl <lvqcl.mail@gmail.com>
This commit is contained in:
@@ -915,7 +915,21 @@ static FLAC__StreamEncoderInitStatus init_stream_internal_(
|
||||
if(encoder->private_->cpuinfo.data.ia32.mmx && encoder->private_->cpuinfo.data.ia32.cmov)
|
||||
encoder->private_->local_fixed_compute_best_predictor = FLAC__fixed_compute_best_predictor_asm_ia32_mmx_cmov;
|
||||
# endif /* FLAC__HAS_NASM */
|
||||
# endif /* FLAC__CPU_IA32 */
|
||||
# elif defined FLAC__CPU_X86_64
|
||||
FLAC__ASSERT(encoder->private_->cpuinfo.type == FLAC__CPUINFO_TYPE_X86_64);
|
||||
# ifdef FLAC__HAS_X86INTRIN
|
||||
if(encoder->protected_->max_lpc_order < 4)
|
||||
encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_sse_lag_4;
|
||||
else if(encoder->protected_->max_lpc_order < 8)
|
||||
encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_sse_lag_8;
|
||||
else if(encoder->protected_->max_lpc_order < 12)
|
||||
encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_sse_lag_12;
|
||||
else if(encoder->protected_->max_lpc_order < 16)
|
||||
encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_sse_lag_16;
|
||||
|
||||
encoder->private_->local_lpc_compute_residual_from_qlp_coefficients_16bit = FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2;
|
||||
# endif /* FLAC__HAS_X86INTRIN */
|
||||
# endif /* FLAC__CPU_... */
|
||||
}
|
||||
# endif /* !FLAC__NO_ASM */
|
||||
#endif /* !FLAC__INTEGER_ONLY_LIBRARY */
|
||||
@@ -2238,8 +2252,8 @@ FLAC__bool resize_buffers_(FLAC__StreamEncoder *encoder, unsigned new_blocksize)
|
||||
|
||||
ok = true;
|
||||
|
||||
/* WATCHOUT: FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32_mmx()
|
||||
* requires that the input arrays (in our case the integer signals)
|
||||
/* WATCHOUT: FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32_mmx() and ..._intrin_sse2()
|
||||
* require that the input arrays (in our case the integer signals)
|
||||
* have a buffer of up to 3 zeroes in front (at negative indices) for
|
||||
* alignment purposes; we use 4 in front to keep the data well-aligned.
|
||||
*/
|
||||
@@ -3167,7 +3181,7 @@ FLAC__bool process_subframe_(
|
||||
#endif
|
||||
#ifndef FLAC__INTEGER_ONLY_LIBRARY
|
||||
FLAC__double lpc_residual_bits_per_sample;
|
||||
FLAC__real autoc[FLAC__MAX_LPC_ORDER+1]; /* WATCHOUT: the size is important even though encoder->protected_->max_lpc_order might be less; some asm routines need all the space */
|
||||
FLAC__real autoc[FLAC__MAX_LPC_ORDER+1]; /* WATCHOUT: the size is important even though encoder->protected_->max_lpc_order might be less; some asm and x86 intrinsic routines need all the space */
|
||||
FLAC__double lpc_error[FLAC__MAX_LPC_ORDER];
|
||||
unsigned min_lpc_order, max_lpc_order, lpc_order;
|
||||
unsigned min_qlp_coeff_precision, max_qlp_coeff_precision, qlp_coeff_precision;
|
||||
@@ -3558,7 +3572,7 @@ unsigned evaluate_lpc_subframe_(
|
||||
FLAC__EntropyCodingMethod_PartitionedRiceContents *partitioned_rice_contents
|
||||
)
|
||||
{
|
||||
FLAC__int32 qlp_coeff[FLAC__MAX_LPC_ORDER];
|
||||
FLAC__int32 qlp_coeff[FLAC__MAX_LPC_ORDER]; /* WATCHOUT: the size is important; x86 intrinsic routines need more than 'order' elements */
|
||||
unsigned i, residual_bits, estimate;
|
||||
int quantization, ret;
|
||||
const unsigned residual_samples = blocksize - order;
|
||||
|
||||
Reference in New Issue
Block a user