optimizations

This commit is contained in:
chudov
2009-09-12 13:50:25 +00:00
parent f09a46e04d
commit 75c39b0654

View File

@@ -239,7 +239,7 @@ namespace CUETools.Codecs.FlaCuda
{ {
DoClose(); DoClose();
if (sample_count != 0 && _position != sample_count) if (sample_count != 0 && _position != sample_count)
throw new Exception("Samples written differs from the expected sample count."); throw new Exception(string.Format("Samples written differs from the expected sample count. Expected {0}, got {1}.", sample_count, _position));
} }
public void Delete() public void Delete()
@@ -936,6 +936,10 @@ namespace CUETools.Codecs.FlaCuda
residualTasks[nResidualTasks].shift = 0; residualTasks[nResidualTasks].shift = 0;
switch (order) switch (order)
{ {
case 5:
residualTasks[nResidualTasks].residualOrder = 1;
residualTasks[nResidualTasks].coefs[0] = 0;
break;
case 1: case 1:
residualTasks[nResidualTasks].coefs[0] = 1; residualTasks[nResidualTasks].coefs[0] = 1;
break; break;
@@ -1056,17 +1060,18 @@ namespace CUETools.Codecs.FlaCuda
// FIXED // FIXED
for (int ch = 0; ch < channelsCount; ch++) for (int ch = 0; ch < channelsCount; ch++)
{ {
for (int order = 1; order <= 4 && order < frame.blocksize; order++) for (int order = 1; order <= 5 && order < frame.blocksize; order++)
{ {
int index = (order - 1) + 8 * ch + ((max_order + 7) & ~7) * _windowcount * channelsCount; int index = (order - 1) + 8 * ch + ((max_order + 7) & ~7) * _windowcount * channelsCount;
int nbits = order * (int)frame.subframes[ch].obits + 6 + residualTasks[index].size; int forder = order == 5 ? 0 : order;
if (residualTasks[index].residualOrder != order) int nbits = forder * (int)frame.subframes[ch].obits + 6 + residualTasks[index].size;
if (residualTasks[index].residualOrder != (order == 5 ? 1 : order))
throw new Exception("oops"); throw new Exception("oops");
if (frame.subframes[ch].best.size > nbits) if (frame.subframes[ch].best.size > nbits)
{ {
frame.subframes[ch].best.type = SubframeType.Fixed; frame.subframes[ch].best.type = SubframeType.Fixed;
frame.subframes[ch].best.size = (uint)nbits; frame.subframes[ch].best.size = (uint)nbits;
frame.subframes[ch].best.order = order; frame.subframes[ch].best.order = forder;
} }
} }
} }
@@ -1155,7 +1160,7 @@ namespace CUETools.Codecs.FlaCuda
cuda.CopyHostToDeviceAsync(cudaSamples, samplesBufferPtr, (uint)(sizeof(int) * FlaCudaWriter.MAX_BLOCKSIZE * channelsCount), cudaStream); cuda.CopyHostToDeviceAsync(cudaSamples, samplesBufferPtr, (uint)(sizeof(int) * FlaCudaWriter.MAX_BLOCKSIZE * channelsCount), cudaStream);
cuda.LaunchAsync(cudaComputeAutocor, partCount, nAutocorTasks, cudaStream); cuda.LaunchAsync(cudaComputeAutocor, partCount, nAutocorTasks, cudaStream);
cuda.LaunchAsync(cudaComputeLPC, 1, nAutocorTasks, cudaStream); cuda.LaunchAsync(cudaComputeLPC, 1, nAutocorTasks, cudaStream);
cuda.SynchronizeStream(cudaStream); //cuda.SynchronizeStream(cudaStream);
//cuda.CopyDeviceToHostAsync(cudaResidualTasks, residualTasksPtr, (uint)(sizeof(encodeResidualTaskStruct) * nResidualTasks), cudaStream1); //cuda.CopyDeviceToHostAsync(cudaResidualTasks, residualTasksPtr, (uint)(sizeof(encodeResidualTaskStruct) * nResidualTasks), cudaStream1);
} }
@@ -1709,31 +1714,33 @@ namespace CUETools.Codecs.FlaCuda
case 0: case 0:
do_midside = false; do_midside = false;
window_function = WindowFunction.Bartlett; window_function = WindowFunction.Bartlett;
max_prediction_order = 4; max_prediction_order = 6;
max_partition_order = 2; max_partition_order = 4;
break; break;
case 1: case 1:
do_midside = false; do_midside = false;
max_prediction_order = 4; window_function = WindowFunction.Bartlett;
max_partition_order = 3; max_prediction_order = 8;
max_partition_order = 6;
break; break;
case 2: case 2:
do_midside = false; do_midside = false;
window_function = WindowFunction.Bartlett; max_partition_order = 6;
max_partition_order = 4;
max_prediction_order = 8; max_prediction_order = 8;
break; break;
case 3: case 3:
window_function = WindowFunction.Bartlett; window_function = WindowFunction.Bartlett;
max_prediction_order = 6; max_partition_order = 4;
max_prediction_order = 4;
break; break;
case 4: case 4:
window_function = WindowFunction.Bartlett; window_function = WindowFunction.Bartlett;
max_prediction_order = 8; max_partition_order = 4;
max_prediction_order = 7;
break; break;
case 5: case 5:
window_function = WindowFunction.Bartlett; window_function = WindowFunction.Bartlett;
max_prediction_order = 10; max_prediction_order = 8;
break; break;
case 6: case 6:
window_function = WindowFunction.Bartlett; window_function = WindowFunction.Bartlett;