From c9c6d7b545952681b309fb9c535bfdbddfd705bf Mon Sep 17 00:00:00 2001 From: chudov Date: Thu, 24 Dec 2009 16:09:20 +0000 Subject: [PATCH] optimizations --- CUETools.Codecs/BitReader.cs | 50 ++- CUETools.Codecs/BitWriter.cs | 51 ++- CUETools.Codecs/Codecs.cs | 65 ++- CUETools.Codecs/LPC.cs | 792 +++++++++++++++++++++++++---------- 4 files changed, 713 insertions(+), 245 deletions(-) diff --git a/CUETools.Codecs/BitReader.cs b/CUETools.Codecs/BitReader.cs index 0818ad8..9f0d9da 100644 --- a/CUETools.Codecs/BitReader.cs +++ b/CUETools.Codecs/BitReader.cs @@ -96,7 +96,21 @@ namespace CUETools.Codecs } } + public BitReader() + { + buffer = null; + pos = 0; + len = 0; + _bitaccumulator = 0; + cache = 0; + } + public BitReader(byte* _buffer, int _pos, int _len) + { + Reset(_buffer, _pos, _len); + } + + public void Reset(byte* _buffer, int _pos, int _len) { buffer = _buffer; pos = _pos; @@ -126,6 +140,16 @@ namespace CUETools.Codecs cache = peek4(); } + /* skip up to 16 bits */ + public void skipbits16(int bits) + { + cache <<= bits; + int new_accumulator = (_bitaccumulator + bits); + pos += (new_accumulator >> 3); + _bitaccumulator = (new_accumulator & 7); + cache |= ((((uint)buffer[pos + 2] << 8) + (uint)buffer[pos + 3]) << _bitaccumulator); + } + /* skip up to 8 bits */ public void skipbits8(int bits) { @@ -288,6 +312,7 @@ namespace CUETools.Codecs { fixed (byte* unary_table = byte_to_unary_table) { + uint mask = (1U << k) - 1; if (k == 0) for (int i = n; i > 0; i--) *(r++) = read_unary_signed(); @@ -303,9 +328,26 @@ namespace CUETools.Codecs bits = unary_table[cache >> 24]; msbs += bits; } - skipbits8((int)(msbs & 7) + 1); - uint uval = (msbs << k) | (cache >> (32 - k)); - skipbits8(k); + int btsk = k + (int)bits + 1; + uint uval = (msbs << k) | ((cache >> (32 - btsk)) & mask); + skipbits16(btsk); + *(r++) = (int)(uval >> 1 ^ -(int)(uval & 1)); + } + else if (k <= 16) + for (int i = n; i > 0; i--) + { + //*(r++) = read_rice_signed((int)k); + uint bits = unary_table[cache >> 24]; + uint msbs = bits; + while (bits == 8) + { + skipbits8(8); + bits = unary_table[cache >> 24]; + msbs += bits; + } + int btsk = k + (int)bits + 1; + uint uval = (msbs << k) | ((cache >> (32 - btsk)) & mask); + skipbits(btsk); *(r++) = (int)(uval >> 1 ^ -(int)(uval & 1)); } else @@ -321,7 +363,7 @@ namespace CUETools.Codecs msbs += bits; } skipbits8((int)(msbs & 7) + 1); - uint uval = (msbs << k) | (cache >> (32 - k)); + uint uval = (msbs << k) | ((cache >> (32 - k))); skipbits(k); *(r++) = (int)(uval >> 1 ^ -(int)(uval & 1)); } diff --git a/CUETools.Codecs/BitWriter.cs b/CUETools.Codecs/BitWriter.cs index 85870c9..69da68d 100644 --- a/CUETools.Codecs/BitWriter.cs +++ b/CUETools.Codecs/BitWriter.cs @@ -42,6 +42,14 @@ namespace CUETools.Codecs eof = false; } + public void Reset() + { + buf_ptr = buf_start; + bit_left = 32; + bit_buf = 0; + eof = false; + } + public void writebytes(int bytes, byte c) { for (; bytes > 0; bytes--) @@ -133,6 +141,13 @@ namespace CUETools.Codecs /// unsafe void writebits_fast(int bits, uint val, ref byte * buf) { +#if DEBUG + if ((buf_ptr + 3) >= buf_end) + { + eof = true; + return; + } +#endif if (bits < bit_left) { bit_buf = (bit_buf << bits) | val; @@ -209,21 +224,29 @@ namespace CUETools.Codecs writebits(k + q, (v & ((1 << k) - 1)) | (1 << k)); } - public unsafe void write_rice_block_signed(int k, int* residual, int count) + public unsafe void write_rice_block_signed(byte * fixedbuf, int k, int* residual, int count) { - fixed (byte* fixbuf = &buffer[buf_ptr]) + byte* buf = &fixedbuf[buf_ptr]; + //fixed (byte* fixbuf = &buffer[buf_ptr]) { - byte* buf = fixbuf; + //byte* buf = fixbuf; for (int i = count; i > 0; i--) { - int v = -2 * *(residual++) - 1; - v ^= (v >> 31); + int v = *(residual++); + v = (v << 1) ^ (v >> 31); // write quotient in unary int q = (v >> k) + 1; int bits = k + q; while (bits > 31) { +#if DEBUG + if (buf + 3 >= fixedbuf + buf_end) + { + eof = true; + return; + } +#endif int b = Math.Min(bits - 31, 31); if (b < bit_left) { @@ -243,6 +266,14 @@ namespace CUETools.Codecs bits -= b; } +#if DEBUG + if (buf + 3 >= fixedbuf + buf_end) + { + eof = true; + return; + } +#endif + // write remainder in binary using 'k' bits //writebits_fast(k + q, (uint)((v & ((1 << k) - 1)) | (1 << k)), ref buf); uint val = (uint)((v & ((1 << k) - 1)) | (1 << k)); @@ -262,7 +293,7 @@ namespace CUETools.Codecs *(buf++) = (byte)(bb); } } - buf_ptr += (int)(buf - fixbuf); + buf_ptr = (int)(buf - fixedbuf); } } @@ -286,6 +317,14 @@ namespace CUETools.Codecs bit_buf = 0; } + public byte[] Buffer + { + get + { + return buffer; + } + } + public int Length { get diff --git a/CUETools.Codecs/Codecs.cs b/CUETools.Codecs/Codecs.cs index 6c518c1..b3bfd15 100644 --- a/CUETools.Codecs/Codecs.cs +++ b/CUETools.Codecs/Codecs.cs @@ -223,12 +223,24 @@ namespace CUETools.Codecs return false; } + unsafe public static void MemCpy(uint* res, uint* smp, int n) + { + for (int i = n; i > 0; i--) + *(res++) = *(smp++); + } + unsafe public static void MemCpy(int* res, int* smp, int n) { for (int i = n; i > 0; i--) *(res++) = *(smp++); } + unsafe public static void MemCpy(short* res, short* smp, int n) + { + for (int i = n; i > 0; i--) + *(res++) = *(smp++); + } + unsafe public static void MemCpy(byte* res, byte* smp, int n) { for (int i = n; i > 0; i--) @@ -383,8 +395,8 @@ namespace CUETools.Codecs { Stream _IO; BinaryReader _br; - ulong _dataOffset, _dataLen; - ulong _samplePos, _sampleLen; + ulong _dataOffset, _samplePos, _sampleLen; + long _dataLen; int _bitsPerSample, _channelCount, _sampleRate, _blockAlign; bool _largeFile; string _path; @@ -398,7 +410,23 @@ namespace CUETools.Codecs ParseHeaders(); - _sampleLen = _dataLen / (uint)_blockAlign; + if (_dataLen < 0) + //_sampleLen = 0; + throw new Exception("WAVE stream length unknown"); + else + _sampleLen = (ulong)(_dataLen / _blockAlign); + } + + public WAVReader(Stream IO) + { + _path = ""; + _IO = IO; + _br = new BinaryReader(_IO); + ParseHeaders(); + if (_dataLen < 0) + _sampleLen = 0; + else + _sampleLen = (ulong)(_dataLen / _blockAlign); } public void Close() @@ -473,14 +501,15 @@ namespace CUETools.Codecs _dataOffset = (ulong)pos; if (!_IO.CanSeek || _IO.Length <= maxFileSize) { - if (ckSize == 0x7fffffff) - throw new Exception("WAVE stream length unknown"); - _dataLen = ckSize; + if (ckSize >= 0x7fffffff) + _dataLen = -1; + else + _dataLen = (long)ckSize; } else { _largeFile = true; - _dataLen = ((ulong)_IO.Length) - _dataOffset; + _dataLen = _IO.Length - pos; } } @@ -517,14 +546,10 @@ namespace CUETools.Codecs { ulong seekPos; - if (value > _sampleLen) - { + if (_sampleLen != 0 && value > _sampleLen) _samplePos = _sampleLen; - } else - { _samplePos = value; - } seekPos = _dataOffset + (_samplePos * (uint)_blockAlign); _IO.Seek((long)seekPos, SeekOrigin.Begin); @@ -581,7 +606,7 @@ namespace CUETools.Codecs public uint Read(int[,] buff, uint sampleCount) { - if (sampleCount > Remaining) + if (_sampleLen > 0 && sampleCount > Remaining) sampleCount = (uint)Remaining; if (sampleCount == 0) @@ -594,7 +619,13 @@ namespace CUETools.Codecs { int len = _IO.Read(_sampleBuffer, pos, (int)byteCount - pos); if (len <= 0) - throw new Exception("Incomplete file read."); + { + if ((pos % BlockAlign) != 0 || _sampleLen > 0) + throw new Exception("Incomplete file read."); + sampleCount = (uint)(pos / BlockAlign); + _sampleLen = _samplePos + sampleCount; + break; + } pos += len; } while (pos < byteCount); AudioSamples.BytesToFLACSamples(_sampleBuffer, 0, buff, 0, @@ -1516,7 +1547,7 @@ namespace CUETools.Codecs // public bool ReadSource(IAudioSource input) - public void Write(int[,] samples, int offset, int count) + public unsafe void Write(int[,] samples, int offset, int count) { int pos, chunk; while (count > 0) @@ -1535,7 +1566,9 @@ namespace CUETools.Codecs chunk = Math.Min(FreeSpace, _size - pos); chunk = Math.Min(chunk, count); } - Array.Copy(samples, offset * Channels, _buffer, pos * Channels, chunk * Channels); + fixed (int* src = &samples[offset, 0], dst = &_buffer[pos, 0]) + AudioSamples.MemCpy(dst, src, chunk * Channels); + //Array.Copy(samples, offset * Channels, _buffer, pos * Channels, chunk * Channels); lock (this) { _end += chunk; diff --git a/CUETools.Codecs/LPC.cs b/CUETools.Codecs/LPC.cs index e39419a..f8a4a8a 100644 --- a/CUETools.Codecs/LPC.cs +++ b/CUETools.Codecs/LPC.cs @@ -26,77 +26,147 @@ namespace CUETools.Codecs public class lpc { public const int MAX_LPC_ORDER = 32; - public const int MAX_LPC_WINDOWS = 4; + public const int MAX_LPC_WINDOWS = 2; public const int MAX_LPC_PRECISIONS = 4; - /** - * Apply Welch window function to audio block - */ - static unsafe void - apply_welch_window(/*const*/int* data, uint len, double* w_data) + public unsafe static void window_welch(float* window, int L) { - double c = (2.0 / (len - 1.0)) - 1.0; - for (uint i = 0; i < (len >> 1); i++) + int N = L - 1; + double N2 = (double)N / 2.0; + + for (int n = 0; n <= N; n++) { - double w = 1.0 - ((c - i) * (c - i)); - w_data[i] = data[i] * w; - w_data[len - 1 - i] = data[len - 1 - i] * w; + double k = ((double)n - N2) / N2; + k = 1.0 - k * k; + window[n] = (float)(k); + } + } + + public unsafe static void window_bartlett(float* window, int L) + { + int N = L - 1; + double N2 = (double)N / 2.0; + for (int n = 0; n <= N; n++) + { + double k = ((double)n - N2) / N2; + k = 1.0 - k * k; + window[n] = (float)(k * k); + } + } + + public unsafe static void window_rectangle(float* window, int L) + { + for (int n = 0; n < L; n++) + window[n] = 1.0F; + } + + public unsafe static void window_flattop(float* window, int L) + { + int N = L - 1; + for (int n = 0; n < L; n++) + window[n] = (float)(1.0 - 1.93 * Math.Cos(2.0 * Math.PI * n / N) + 1.29 * Math.Cos(4.0 * Math.PI * n / N) - 0.388 * Math.Cos(6.0 * Math.PI * n / N) + 0.0322 * Math.Cos(8.0 * Math.PI * n / N)); + } + + public unsafe static void window_tukey(float* window, int L) + { + window_rectangle(window, L); + double p = 0.5; + int Np = (int)(p / 2.0 * L) - 1; + if (Np > 0) + { + for (int n = 0; n <= Np; n++) + { + window[n] = (float)(0.5 - 0.5 * Math.Cos(Math.PI * n / Np)); + window[L - Np - 1 + n] = (float)(0.5 - 0.5 * Math.Cos(Math.PI * (n + Np) / Np)); + } + } + } + + public unsafe static void window_hann(float* window, int L) + { + int N = L - 1; + for (int n = 0; n < L; n++) + window[n] = (float)(0.5 - 0.5 * Math.Cos(2.0 * Math.PI * n / N)); + } + + private static short sign_only(int val) + { + return (short)((val >> 31) + ((val - 1) >> 31) + 1); + } + + static public unsafe void + compute_corr_int(/*const*/ short* data1, short* data2, int len, int min, int lag, int* autoc) + { + for (int i = min; i <= lag; ++i) + { + int temp = 0; + int temp2 = 0; + + for (int j = 0; j <= lag - i; ++j) + temp += data1[j + i] * data2[j]; + + for (int j = lag + 1 - i; j < len - i; j += 2) + { + temp += data1[j + i] * data2[j]; + temp2 += data1[j + i + 1] * data2[j + 1]; + } + autoc[i] = temp + temp2; } } /** * Calculates autocorrelation data from audio samples - * A Welch window function is applied before calculation. + * A window function is applied before calculation. */ static public unsafe void - compute_autocorr(/*const*/ int* data, uint len, uint min, uint lag, double* autoc, double* window) + compute_autocorr(/*const*/ int* data, int len, int min, int lag, double* autoc, float* window) { - double* data1 = stackalloc double[(int)len + 16]; - uint i, j; - double temp, temp2; +#if FPAC + short* data1 = stackalloc short[len + 1]; + short* data2 = stackalloc short[len + 1]; + int* c1 = stackalloc int[lpc.MAX_LPC_ORDER + 1]; + int* c2 = stackalloc int[lpc.MAX_LPC_ORDER + 1]; + int* c3 = stackalloc int[lpc.MAX_LPC_ORDER + 1]; + int* c4 = stackalloc int[lpc.MAX_LPC_ORDER + 1]; - if (window == null) - apply_welch_window(data, len, data1); - else + for (int i = 0; i < len; i++) { - for (i = 0; i < len; i++) - data1[i] = data[i] * window[i]; + int val = (int)(data[i] * window[i]); + data1[i] = (short)(sign_only(val) * (Math.Abs(val) >> 9)); + data2[i] = (short)(sign_only(val) * (Math.Abs(val) & 0x1ff)); } data1[len] = 0; + data2[len] = 0; + + compute_corr_int(data1, data1, len, min, lag, c1); + compute_corr_int(data1, data2, len, min, lag, c2); + compute_corr_int(data2, data1, len, min, lag, c3); + compute_corr_int(data2, data2, len, min, lag, c4); + + for (int coeff = min; coeff <= lag; coeff++) + autoc[coeff] = (c1[coeff] * (double)(1 << 18) + (c2[coeff] + c3[coeff]) * (double)(1 << 9) + c4[coeff]); +#else + double* data1 = stackalloc double[(int)len + 16]; + int i; + + for (i = 0; i < len; i++) + data1[i] = data[i] * window[i]; + data1[len] = 0; for (i = min; i <= lag; ++i) { - temp = 1.0; - temp2 = 1.0; - for (j = 0; j <= lag - i; ++j) - temp += data1[j + i] * data1[j]; - + double temp = 1.0; + double temp2 = 1.0; double* finish = data1 + len - i; - for (double* pdata = data1 + lag + 1 - i; pdata < finish; pdata += 2) + + for (double* pdata = data1; pdata < finish; pdata += 2) { temp += pdata[i] * pdata[0]; temp2 += pdata[i + 1] * pdata[1]; } autoc[i] = temp + temp2; } - - //int sample, coeff; - //for (coeff = 0; coeff <= lag; coeff++) - // autoc[coeff] = 0.0; - //int data_len = (int)len; - //int limit = data_len - (int)lag - 1; - //for (sample = 0; sample <= limit; sample++) - //{ - // double d = data1[sample]; - // for (coeff = 0; coeff <= lag; coeff++) - // autoc[coeff] += d * data1[sample + coeff]; - //} - //for (; sample < data_len; sample++) - //{ - // double d = data1[sample]; - // for (coeff = 0; coeff < data_len - sample; coeff++) - // autoc[coeff] += d * data1[sample + coeff]; - //} +#endif } /** @@ -104,64 +174,39 @@ namespace CUETools.Codecs * Produces LPC coefficients from autocorrelation data. */ public static unsafe void - compute_lpc_coefs(/*const*/ double* autoc, uint max_order, double* reff, - double* lpc/*[][MAX_LPC_ORDER]*/) + compute_lpc_coefs(uint max_order, double* reff, float* lpc/*[][MAX_LPC_ORDER]*/) { double* lpc_tmp = stackalloc double[MAX_LPC_ORDER]; - int i, j, i2; - double r, err, tmp; - if (max_order > MAX_LPC_ORDER) throw new Exception("wierd"); - for (i = 0; i < max_order; i++) + for (int i = 0; i < max_order; i++) lpc_tmp[i] = 0; - err = 1.0; - if (autoc != null) - err = autoc[0]; - - for (i = 0; i < max_order; i++) + for (int i = 0; i < max_order; i++) { - if (reff != null) - { - r = reff[i]; - } - else - { - r = -autoc[i + 1]; - for (j = 0; j < i; j++) - { - r -= lpc_tmp[j] * autoc[i - j]; - } - r /= err; - err *= 1.0 - (r * r); - } - - i2 = (i >> 1); + double r = reff[i]; + int i2 = (i >> 1); lpc_tmp[i] = r; - for (j = 0; j < i2; j++) + for (int j = 0; j < i2; j++) { - tmp = lpc_tmp[j]; + double tmp = lpc_tmp[j]; lpc_tmp[j] += r * lpc_tmp[i - 1 - j]; lpc_tmp[i - 1 - j] += r * tmp; } - if (0 != (i & 1)) - { - lpc_tmp[j] += lpc_tmp[j] * r; - } - for (j = 0; j <= i; j++) - { - lpc[i * MAX_LPC_ORDER + j] = -lpc_tmp[j]; - } + if (0 != (i & 1)) + lpc_tmp[i2] += lpc_tmp[i2] * r; + + for (int j = 0; j <= i; j++) + lpc[i * MAX_LPC_ORDER + j] = (float)-lpc_tmp[j]; } } public static unsafe void compute_schur_reflection(/*const*/ double* autoc, uint max_order, - double* reff/*[][MAX_LPC_ORDER]*/) + double* reff/*[][MAX_LPC_ORDER]*/, double * err) { double* gen0 = stackalloc double[MAX_LPC_ORDER]; double* gen1 = stackalloc double[MAX_LPC_ORDER]; @@ -173,6 +218,7 @@ namespace CUETools.Codecs double error = autoc[0]; reff[0] = -gen1[0] / error; error += gen1[0] * reff[0]; + err[0] = error; for (uint i = 1; i < max_order; i++) { for (uint j = 0; j < max_order - i; j++) @@ -182,49 +228,19 @@ namespace CUETools.Codecs } reff[i] = -gen1[0] / error; error += gen1[0] * reff[i]; + err[i] = error; } } - /** - * Compute LPC coefs for Flake.OrderMethod._EST - * Faster LPC coeff computation by first calculating the reflection coefficients - * using Schur recursion. That allows for estimating the optimal order before - * running Levinson recursion. - */ - public static unsafe uint - compute_lpc_coefs_est(/*const*/ double* autoc, uint max_order, - double* lpc/*[][MAX_LPC_ORDER]*/) - { - double* reff = stackalloc double[MAX_LPC_ORDER]; - - // Schur recursion - compute_schur_reflection(autoc, max_order, reff); - - // Estimate optimal order using reflection coefficients - uint order_est = 1; - for (int i = (int)max_order - 1; i >= 0; i--) - { - if (Math.Abs(reff[i]) > 0.10) - { - order_est = (uint)i + 1; - break; - } - } - - // Levinson recursion - compute_lpc_coefs(null, order_est, reff, lpc); - return order_est; - } - /** * Quantize LPC coefficients */ public static unsafe void - quantize_lpc_coefs(double* lpc_in, int order, uint precision, int* lpc_out, + quantize_lpc_coefs(float* lpc_in, int order, uint precision, int* lpc_out, out int shift, int max_shift, int zero_shift) { int i; - double d, cmax, error; + float d, cmax, error; int qmax; int sh, q; @@ -232,7 +248,7 @@ namespace CUETools.Codecs qmax = (1 << ((int)precision - 1)) - 1; // find maximum coefficient value - cmax = 0.0; + cmax = 0.0F; for (i = 0; i < order; i++) { d = Math.Abs(lpc_in[i]); @@ -259,7 +275,7 @@ namespace CUETools.Codecs // coefficients instead if (sh == 0 && cmax > qmax) { - double scale = ((double)qmax) / cmax; + float scale = ((float)qmax) / cmax; for (i = 0; i < order; i++) { lpc_in[i] *= scale; @@ -311,9 +327,8 @@ namespace CUETools.Codecs case 3: for (int i = n - order; i > 0; i--) { - int pred = coefs[2] * *(s++); - pred += c1 * *(s++); - pred += c0 * *(s++); + int pred = coefs[2] * *(s++) + + c1 * *(s++) + c0 * *(s++); *(r++) = *s - (pred >> shift); s -= 2; } @@ -321,10 +336,10 @@ namespace CUETools.Codecs case 4: for (int i = n - order; i > 0; i--) { - int pred = coefs[3] * *(s++); - pred += coefs[2] * *(s++); - pred += c1 * *(s++); - pred += c0 * *(s++); + int* c = coefs + order - 1; + int pred = + *(c--) * *(s++) + *(c--) * *(s++) + + c1 * *(s++) + c0 * *(s++); *(r++) = *s - (pred >> shift); s -= 3; } @@ -332,11 +347,11 @@ namespace CUETools.Codecs case 5: for (int i = n - order; i > 0; i--) { - int pred = coefs[4] * *(s++); - pred += coefs[3] * *(s++); - pred += coefs[2] * *(s++); - pred += c1 * *(s++); - pred += c0 * *(s++); + int* c = coefs + order - 1; + int pred = + *(c--) * *(s++) + + *(c--) * *(s++) + *(c--) * *(s++) + + c1 * *(s++) + c0 * *(s++); *(r++) = *s - (pred >> shift); s -= 4; } @@ -344,12 +359,11 @@ namespace CUETools.Codecs case 6: for (int i = n - order; i > 0; i--) { - int pred = coefs[5] * *(s++); - pred += coefs[4] * *(s++); - pred += coefs[3] * *(s++); - pred += coefs[2] * *(s++); - pred += c1 * *(s++); - pred += c0 * *(s++); + int* c = coefs + order - 1; + int pred = + *(c--) * *(s++) + *(c--) * *(s++) + + *(c--) * *(s++) + *(c--) * *(s++) + + c1 * *(s++) + c0 * *(s++); *(r++) = *s - (pred >> shift); s -= 5; } @@ -357,13 +371,12 @@ namespace CUETools.Codecs case 7: for (int i = n - order; i > 0; i--) { - int pred = coefs[6] * *(s++); - pred += coefs[5] * *(s++); - pred += coefs[4] * *(s++); - pred += coefs[3] * *(s++); - pred += coefs[2] * *(s++); - pred += c1 * *(s++); - pred += c0 * *(s++); + int* c = coefs + order - 1; + int pred = + *(c--) * *(s++) + + *(c--) * *(s++) + *(c--) * *(s++) + + *(c--) * *(s++) + *(c--) * *(s++) + + c1 * *(s++) + c0 * *(s++); *(r++) = *s - (pred >> shift); s -= 6; } @@ -371,35 +384,90 @@ namespace CUETools.Codecs case 8: for (int i = n - order; i > 0; i--) { - int pred = coefs[7] * *(s++); - pred += coefs[6] * *(s++); - pred += coefs[5] * *(s++); - pred += coefs[4] * *(s++); - pred += coefs[3] * *(s++); - pred += coefs[2] * *(s++); - pred += c1 * *(s++); - pred += c0 * *(s++); + int* c = coefs + order - 1; + int pred = + *(c--) * *(s++) + *(c--) * *(s++) + + *(c--) * *(s++) + *(c--) * *(s++) + + *(c--) * *(s++) + *(c--) * *(s++) + + c1 * *(s++) + c0 * *(s++); *(r++) = *s - (pred >> shift); s -= 7; } break; + case 9: + for (int i = n - order; i > 0; i--) + { + int* c = coefs + order - 1; + int pred = + *(c--) * *(s++) + + *(c--) * *(s++) + *(c--) * *(s++) + + *(c--) * *(s++) + *(c--) * *(s++) + + *(c--) * *(s++) + *(c--) * *(s++) + + c1 * *(s++) + c0 * *(s++); + *(r++) = *s - (pred >> shift); + s -= 8; + } + break; + case 10: + for (int i = n - order; i > 0; i--) + { + int* c = coefs + order - 1; + int pred = + *(c--) * *(s++) + *(c--) * *(s++) + + *(c--) * *(s++) + *(c--) * *(s++) + + *(c--) * *(s++) + *(c--) * *(s++) + + *(c--) * *(s++) + *(c--) * *(s++) + + c1 * *(s++) + c0 * *(s++); + *(r++) = *s - (pred >> shift); + s -= 9; + } + break; + case 11: + for (int i = n - order; i > 0; i--) + { + int* c = coefs + order - 1; + int pred = + *(c--) * *(s++) + + *(c--) * *(s++) + *(c--) * *(s++) + + *(c--) * *(s++) + *(c--) * *(s++) + + *(c--) * *(s++) + *(c--) * *(s++) + + *(c--) * *(s++) + *(c--) * *(s++) + + c1 * *(s++) + c0 * *(s++); + *(r++) = *s - (pred >> shift); + s -= 10; + } + break; + case 12: + for (int i = n - order; i > 0; i--) + { + int* c = coefs + order - 1; + int pred = + *(c--) * *(s++) + *(c--) * *(s++) + + *(c--) * *(s++) + *(c--) * *(s++) + + *(c--) * *(s++) + *(c--) * *(s++) + + *(c--) * *(s++) + *(c--) * *(s++) + + *(c--) * *(s++) + *(c--) * *(s++) + + c1 * *(s++) + c0 * *(s++); + *(r++) = *s - (pred >> shift); + s -= 11; + } + break; default: for (int i = order; i < n; i++) { s = smp + i - order; int pred = 0; - int* co = coefs + order - 1; - int* c7 = coefs + 7; - while (co > c7) - pred += *(co--) * *(s++); - pred += coefs[7] * *(s++); - pred += coefs[6] * *(s++); - pred += coefs[5] * *(s++); - pred += coefs[4] * *(s++); - pred += coefs[3] * *(s++); - pred += coefs[2] * *(s++); - pred += c1 * *(s++); - pred += c0 * *(s++); + int* c = coefs + order - 1; + int* c11 = coefs + 11; + while (c > c11) + pred += *(c--) * *(s++); + pred += + *(c--) * *(s++) + *(c--) * *(s++) + + *(c--) * *(s++) + *(c--) * *(s++) + + *(c--) * *(s++) + *(c--) * *(s++) + + *(c--) * *(s++) + *(c--) * *(s++) + + *(c--) * *(s++) + *(c--) * *(s++) + + c1 * *(s++) + c0 * *(s++); *(r++) = *s - (pred >> shift); } break; @@ -532,6 +600,181 @@ namespace CUETools.Codecs } } + public static unsafe void + encode_residual2(int* res, int* smp, int n, int order, + int* coefs, int shift) + { + int* s = smp; + int* r = res; + int c0 = coefs[0]; + int c1 = coefs[1]; + switch (order) + { + case 1: + for (int i = n - order; i > 0; i--) + { + int pred = c0 * *(s++); + *(r++) = *s - (pred >> shift); + } + break; + case 2: + for (int i = n - order; i > 0; i--) + { + int pred = c1 * *(s++); + pred += c0 * *(s++); + *(r++) = *(s--) - (pred >> shift); + } + break; + case 3: + for (int i = n - order; i > 0; i--) + { + int pred = coefs[2] * *(s++) + + c1 * *(s++) + c0 * *(s++); + *(r++) = *s - (pred >> shift); + s -= 2; + } + break; + case 4: + for (int i = n - order; i > 0; i--) + { + int* c = coefs + order - 1; + int pred = + *(c--) * *(s++) + *(c--) * *(s++) + + c1 * *(s++) + c0 * *(s++); + *(r++) = *s - (pred >> shift); + s -= 3; + } + break; + case 5: + for (int i = n - order; i > 0; i--) + { + int* c = coefs + order - 1; + int pred = + *(c--) * *(s++) + + *(c--) * *(s++) + *(c--) * *(s++) + + c1 * *(s++) + c0 * *(s++); + *(r++) = *s - (pred >> shift); + s -= 4; + } + break; + case 6: + for (int i = n - order; i > 0; i--) + { + int* c = coefs + order - 1; + int pred = + *(c--) * *(s++) + *(c--) * *(s++) + + *(c--) * *(s++) + *(c--) * *(s++) + + c1 * *(s++) + c0 * *(s++); + *(r++) = *s - (pred >> shift); + s -= 5; + } + break; + case 7: + for (int i = n - order; i > 0; i--) + { + int* c = coefs + order - 1; + int pred = + *(c--) * *(s++) + + *(c--) * *(s++) + *(c--) * *(s++) + + *(c--) * *(s++) + *(c--) * *(s++) + + c1 * *(s++) + c0 * *(s++); + *(r++) = *s - (pred >> shift); + s -= 6; + } + break; + case 8: + for (int i = n - order; i > 0; i--) + { + int* c = coefs + order - 1; + int pred = + *(c--) * *(s++) + *(c--) * *(s++) + + *(c--) * *(s++) + *(c--) * *(s++) + + *(c--) * *(s++) + *(c--) * *(s++) + + c1 * *(s++) + c0 * *(s++); + *(r++) = *s - (pred >> shift); + s -= 7; + } + break; + case 9: + for (int i = n - order; i > 0; i--) + { + int* c = coefs + order - 1; + int pred = + *(c--) * *(s++) + + *(c--) * *(s++) + *(c--) * *(s++) + + *(c--) * *(s++) + *(c--) * *(s++) + + *(c--) * *(s++) + *(c--) * *(s++) + + c1 * *(s++) + c0 * *(s++); + *(r++) = *s - (pred >> shift); + s -= 8; + } + break; + case 10: + for (int i = n - order; i > 0; i--) + { + int* c = coefs + order - 1; + int pred = + *(c--) * *(s++) + *(c--) * *(s++) + + *(c--) * *(s++) + *(c--) * *(s++) + + *(c--) * *(s++) + *(c--) * *(s++) + + *(c--) * *(s++) + *(c--) * *(s++) + + c1 * *(s++) + c0 * *(s++); + *(r++) = *s - (pred >> shift); + s -= 9; + } + break; + case 11: + for (int i = n - order; i > 0; i--) + { + int* c = coefs + order - 1; + int pred = + *(c--) * *(s++) + + *(c--) * *(s++) + *(c--) * *(s++) + + *(c--) * *(s++) + *(c--) * *(s++) + + *(c--) * *(s++) + *(c--) * *(s++) + + *(c--) * *(s++) + *(c--) * *(s++) + + c1 * *(s++) + c0 * *(s++); + *(r++) = *s - (pred >> shift); + s -= 10; + } + break; + case 12: + for (int i = n - order; i > 0; i--) + { + int* c = coefs + order - 1; + int pred = + *(c--) * *(s++) + *(c--) * *(s++) + + *(c--) * *(s++) + *(c--) * *(s++) + + *(c--) * *(s++) + *(c--) * *(s++) + + *(c--) * *(s++) + *(c--) * *(s++) + + *(c--) * *(s++) + *(c--) * *(s++) + + c1 * *(s++) + c0 * *(s++); + *(r++) = *s - (pred >> shift); + s -= 11; + } + break; + default: + for (int i = order; i < n; i++) + { + s = smp + i - order; + int pred = 0; + int* c = coefs + order - 1; + int* c11 = coefs + 11; + while (c > c11) + pred += *(c--) * *(s++); + pred += + *(c--) * *(s++) + *(c--) * *(s++) + + *(c--) * *(s++) + *(c--) * *(s++) + + *(c--) * *(s++) + *(c--) * *(s++) + + *(c--) * *(s++) + *(c--) * *(s++) + + *(c--) * *(s++) + *(c--) * *(s++) + + c1 * *(s++) + c0 * *(s++); + *(r++) = *s - (pred >> shift); + } + break; + } + } + public static unsafe void decode_residual(int* res, int* smp, int n, int order, int* coefs, int shift) @@ -555,17 +798,17 @@ namespace CUETools.Codecs case 2: for (int i = n - order; i > 0; i--) { - int pred = c1 * *(s++); - pred += c0 * *(s++); + int pred = c1 * *(s++) + c0 * *(s++); *(s--) = *(r++) + (pred >> shift); } break; case 3: for (int i = n - order; i > 0; i--) { - int pred = coefs[2] * *(s++); - pred += c1 * *(s++); - pred += c0 * *(s++); + int* co = coefs + order - 1; + int pred = + *(co--) * *(s++) + + c1 * *(s++) + c0 * *(s++); *s = *(r++) + (pred >> shift); s -= 2; } @@ -573,10 +816,10 @@ namespace CUETools.Codecs case 4: for (int i = n - order; i > 0; i--) { - int pred = coefs[3] * *(s++); - pred += coefs[2] * *(s++); - pred += c1 * *(s++); - pred += c0 * *(s++); + int* co = coefs + order - 1; + int pred = + *(co--) * *(s++) + *(co--) * *(s++) + + c1 * *(s++) + c0 * *(s++); *s = *(r++) + (pred >> shift); s -= 3; } @@ -584,11 +827,11 @@ namespace CUETools.Codecs case 5: for (int i = n - order; i > 0; i--) { - int pred = coefs[4] * *(s++); - pred += coefs[3] * *(s++); - pred += coefs[2] * *(s++); - pred += c1 * *(s++); - pred += c0 * *(s++); + int* co = coefs + order - 1; + int pred = + *(co--) * *(s++) + + *(co--) * *(s++) + *(co--) * *(s++) + + c1 * *(s++) + c0 * *(s++); *s = *(r++) + (pred >> shift); s -= 4; } @@ -596,12 +839,11 @@ namespace CUETools.Codecs case 6: for (int i = n - order; i > 0; i--) { - int pred = coefs[5] * *(s++); - pred += coefs[4] * *(s++); - pred += coefs[3] * *(s++); - pred += coefs[2] * *(s++); - pred += c1 * *(s++); - pred += c0 * *(s++); + int* co = coefs + order - 1; + int pred = + *(co--) * *(s++) + *(co--) * *(s++) + + *(co--) * *(s++) + *(co--) * *(s++) + + c1 * *(s++) + c0 * *(s++); *s = *(r++) + (pred >> shift); s -= 5; } @@ -609,13 +851,12 @@ namespace CUETools.Codecs case 7: for (int i = n - order; i > 0; i--) { - int pred = coefs[6] * *(s++); - pred += coefs[5] * *(s++); - pred += coefs[4] * *(s++); - pred += coefs[3] * *(s++); - pred += coefs[2] * *(s++); - pred += c1 * *(s++); - pred += c0 * *(s++); + int* co = coefs + order - 1; + int pred = + *(co--) * *(s++) + + *(co--) * *(s++) + *(co--) * *(s++) + + *(co--) * *(s++) + *(co--) * *(s++) + + c1 * *(s++) + c0 * *(s++); *s = *(r++) + (pred >> shift); s -= 6; } @@ -623,18 +864,74 @@ namespace CUETools.Codecs case 8: for (int i = n - order; i > 0; i--) { - int pred = coefs[7] * *(s++); - pred += coefs[6] * *(s++); - pred += coefs[5] * *(s++); - pred += coefs[4] * *(s++); - pred += coefs[3] * *(s++); - pred += coefs[2] * *(s++); - pred += c1 * *(s++); - pred += c0 * *(s++); + int* co = coefs + order - 1; + int pred = + *(co--) * *(s++) + *(co--) * *(s++) + + *(co--) * *(s++) + *(co--) * *(s++) + + *(co--) * *(s++) + *(co--) * *(s++) + + c1 * *(s++) + c0 * *(s++); *s = *(r++) + (pred >> shift); s -= 7; } break; + case 9: + for (int i = n - order; i > 0; i--) + { + int* co = coefs + order - 1; + int pred = + *(co--) * *(s++) + + *(co--) * *(s++) + *(co--) * *(s++) + + *(co--) * *(s++) + *(co--) * *(s++) + + *(co--) * *(s++) + *(co--) * *(s++) + + c1 * *(s++) + c0 * *(s++); + *s = *(r++) + (pred >> shift); + s -= 8; + } + break; + case 10: + for (int i = n - order; i > 0; i--) + { + int* co = coefs + order - 1; + int pred = + *(co--) * *(s++) + *(co--) * *(s++) + + *(co--) * *(s++) + *(co--) * *(s++) + + *(co--) * *(s++) + *(co--) * *(s++) + + *(co--) * *(s++) + *(co--) * *(s++) + + c1 * *(s++) + c0 * *(s++); + *s = *(r++) + (pred >> shift); + s -= 9; + } + break; + case 11: + for (int i = n - order; i > 0; i--) + { + int* co = coefs + order - 1; + int pred = + *(co--) * *(s++) + + *(co--) * *(s++) + *(co--) * *(s++) + + *(co--) * *(s++) + *(co--) * *(s++) + + *(co--) * *(s++) + *(co--) * *(s++) + + *(co--) * *(s++) + *(co--) * *(s++) + + c1 * *(s++) + c0 * *(s++); + *s = *(r++) + (pred >> shift); + s -= 10; + } + break; + case 12: + for (int i = n - order; i > 0; i--) + { + int* co = coefs + order - 1; + int pred = + *(co--) * *(s++) + *(co--) * *(s++) + + *(co--) * *(s++) + *(co--) * *(s++) + + *(co--) * *(s++) + *(co--) * *(s++) + + *(co--) * *(s++) + *(co--) * *(s++) + + *(co--) * *(s++) + *(co--) * *(s++) + + c1 * *(s++) + c0 * *(s++); + *s = *(r++) + (pred >> shift); + s -= 11; + } + break; default: for (int i = order; i < n; i++) { @@ -791,8 +1088,11 @@ namespace CUETools.Codecs { public LpcContext() { + coefs = new int[lpc.MAX_LPC_ORDER]; reflection_coeffs = new double[lpc.MAX_LPC_ORDER]; + prediction_error = new double[lpc.MAX_LPC_ORDER]; autocorr_values = new double[lpc.MAX_LPC_ORDER + 1]; + best_orders = new int[lpc.MAX_LPC_ORDER]; done_lpcs = new uint[lpc.MAX_LPC_PRECISIONS]; } @@ -815,44 +1115,99 @@ namespace CUETools.Codecs /// Samples pointer /// Block size /// Window function - public void GetReflection(int order, int* samples, int blocksize, double* window) + public void GetReflection(int order, int* samples, int blocksize, float* window) { if (autocorr_order > order) return; - fixed (double* reff = reflection_coeffs, autoc = autocorr_values) + fixed (double* reff = reflection_coeffs, autoc = autocorr_values, err = prediction_error) { - lpc.compute_autocorr(samples, (uint)blocksize, (uint)autocorr_order, (uint)order, autoc, window); - lpc.compute_schur_reflection(autoc, (uint)order, reff); + lpc.compute_autocorr(samples, blocksize, autocorr_order, order, autoc, window); + lpc.compute_schur_reflection(autoc, (uint)order, reff, err); autocorr_order = order + 1; } } + public void GetReflection1(int order, int* samples, int blocksize, float* window) + { + if (autocorr_order > order) + return; + fixed (double* reff = reflection_coeffs, autoc = autocorr_values, err = prediction_error) + { + lpc.compute_autocorr(samples, blocksize, 0, order + 1, autoc, window); + for (int i = 1; i <= order; i++) + autoc[i] = autoc[i + 1]; + lpc.compute_schur_reflection(autoc, (uint)order, reff, err); + autocorr_order = order + 1; + } + } + + public void ComputeReflection(int order, float* autocorr) + { + fixed (double* reff = reflection_coeffs, autoc = autocorr_values, err = prediction_error) + { + for (int i = 0; i <= order; i++) + autoc[i] = autocorr[i]; + lpc.compute_schur_reflection(autoc, (uint)order, reff, err); + autocorr_order = order + 1; + } + } + + public void ComputeReflection(int order, double* autocorr) + { + fixed (double* reff = reflection_coeffs, autoc = autocorr_values, err = prediction_error) + { + for (int i = 0; i <= order; i++) + autoc[i] = autocorr[i]; + lpc.compute_schur_reflection(autoc, (uint)order, reff, err); + autocorr_order = order + 1; + } + } + + public double Akaike(int blocksize, int order, double alpha, double beta) + { + //return (blocksize - order) * (Math.Log(prediction_error[order - 1]) - Math.Log(1.0)) + Math.Log(blocksize) * order * (alpha + beta * order); + return blocksize * Math.Log(prediction_error[order - 1]) + Math.Log(blocksize) * order * (alpha + beta * order); + } + + /// + /// Sorts orders based on Akaike's criteria + /// + /// Frame size + public void SortOrdersAkaike(int blocksize, int count, int max_order, double alpha, double beta) + { + for (int i = 0; i < max_order; i++) + best_orders[i] = i + 1; + for (int i = 0; i < max_order && i < count; i++) + { + for (int j = i + 1; j < max_order; j++) + { + if (Akaike(blocksize, best_orders[j], alpha, beta) < Akaike(blocksize, best_orders[i], alpha, beta)) + { + int tmp = best_orders[j]; + best_orders[j] = best_orders[i]; + best_orders[i] = tmp; + } + } + } + } + /// /// Produces LPC coefficients from autocorrelation data. /// /// LPC coefficients buffer (for all orders) - public void ComputeLPC(double* lpcs) + public void ComputeLPC(float* lpcs) { fixed (double* reff = reflection_coeffs) - lpc.compute_lpc_coefs(null, (uint)autocorr_order - 1, reff, lpcs); + lpc.compute_lpc_coefs((uint)autocorr_order - 1, reff, lpcs); } - /// - /// Checks if an order is 'interesting'. - /// Lower orders are likely to be useful if next two reflection coefficients are small, - /// Higher orders are likely to be useful if reflection coefficient for that order is greater than 0.1 and the next one is not. - /// - /// - /// - public bool IsInterestingOrder(int order) - { - return (order > 4 && Math.Abs(reflection_coeffs[order - 1]) >= 0.10 && (order == autocorr_order - 1 || Math.Abs(reflection_coeffs[order]) < 0.10)) || - (order < 6 && order < autocorr_order - 2 && reflection_coeffs[order + 1] * reflection_coeffs[order + 1] + reflection_coeffs[order] * reflection_coeffs[order] < 0.1); - } - - double[] autocorr_values; + public double[] autocorr_values; double[] reflection_coeffs; + public double[] prediction_error; + public int[] best_orders; + public int[] coefs; int autocorr_order; + public int shift; public double[] Reflection { @@ -864,5 +1219,4 @@ namespace CUETools.Codecs public uint[] done_lpcs; } - }