lossywav support

This commit is contained in:
chudov
2009-09-17 22:15:11 +00:00
parent 7796ef40c6
commit d8957f6a3f
3 changed files with 464 additions and 535 deletions

View File

@@ -98,9 +98,6 @@ namespace CUETools.Codecs.FlaCuda
CUdeviceptr cudaWindow; CUdeviceptr cudaWindow;
int nResidualTasks = 0;
int nAutocorTasks = 0;
bool encode_on_cpu = true; bool encode_on_cpu = true;
public const int MAX_BLOCKSIZE = 4608 * 4; public const int MAX_BLOCKSIZE = 4608 * 4;
@@ -820,8 +817,9 @@ namespace CUETools.Codecs.FlaCuda
unsafe void initialize_autocorTasks(int blocksize, int channelsCount, int max_order, int nFrames, FlaCudaTask task) unsafe void initialize_autocorTasks(int blocksize, int channelsCount, int max_order, int nFrames, FlaCudaTask task)
{ {
computeAutocorTaskStruct* autocorTasks = (computeAutocorTaskStruct*)task.autocorTasksPtr; computeAutocorTaskStruct* autocorTasks = (computeAutocorTaskStruct*)task.autocorTasksPtr;
nAutocorTasks = 0; task.nAutocorTasks = 0;
nResidualTasks = 0; task.nResidualTasks = 0;
task.nResidualTasksPerChannel = (_windowcount * max_order + 6 + 7) & ~7;
for (int iFrame = 0; iFrame < nFrames; iFrame++) for (int iFrame = 0; iFrame < nFrames; iFrame++)
{ {
for (int ch = 0; ch < channelsCount; ch++) for (int ch = 0; ch < channelsCount; ch++)
@@ -829,83 +827,94 @@ namespace CUETools.Codecs.FlaCuda
for (int iWindow = 0; iWindow < _windowcount; iWindow++) for (int iWindow = 0; iWindow < _windowcount; iWindow++)
{ {
// Autocorelation task // Autocorelation task
autocorTasks[nAutocorTasks].samplesOffs = ch * FlaCudaWriter.MAX_BLOCKSIZE + iFrame * blocksize; autocorTasks[task.nAutocorTasks].samplesOffs = ch * FlaCudaWriter.MAX_BLOCKSIZE + iFrame * blocksize;
autocorTasks[nAutocorTasks].windowOffs = iWindow * 2 * FlaCudaWriter.MAX_BLOCKSIZE; autocorTasks[task.nAutocorTasks].windowOffs = iWindow * 2 * FlaCudaWriter.MAX_BLOCKSIZE;
autocorTasks[nAutocorTasks].residualOffs = max_order * (iWindow + (_windowcount + 1) * (ch + iFrame * channelsCount)); autocorTasks[task.nAutocorTasks].residualOffs = max_order * iWindow + task.nResidualTasksPerChannel * (ch + iFrame * channelsCount);
autocorTasks[nAutocorTasks].blocksize = blocksize; autocorTasks[task.nAutocorTasks].blocksize = blocksize;
nAutocorTasks++; task.nAutocorTasks++;
// LPC tasks // LPC tasks
for (int order = 1; order <= max_order; order++) for (int order = 1; order <= max_order; order++)
{ {
task.ResidualTasks[nResidualTasks].type = (int)SubframeType.LPC; task.ResidualTasks[task.nResidualTasks].type = (int)SubframeType.LPC;
task.ResidualTasks[nResidualTasks].channel = ch; task.ResidualTasks[task.nResidualTasks].channel = ch;
task.ResidualTasks[nResidualTasks].obits = (int)bits_per_sample + (channels == 2 && ch == 3 ? 1 : 0); task.ResidualTasks[task.nResidualTasks].obits = (int)bits_per_sample + (channels == 2 && ch == 3 ? 1 : 0);
task.ResidualTasks[nResidualTasks].blocksize = blocksize; task.ResidualTasks[task.nResidualTasks].blocksize = blocksize;
task.ResidualTasks[nResidualTasks].residualOrder = order <= max_order ? order : 0; task.ResidualTasks[task.nResidualTasks].residualOrder = order <= max_order ? order : 0;
task.ResidualTasks[nResidualTasks].samplesOffs = ch * FlaCudaWriter.MAX_BLOCKSIZE + iFrame * blocksize; task.ResidualTasks[task.nResidualTasks].samplesOffs = ch * FlaCudaWriter.MAX_BLOCKSIZE + iFrame * blocksize;
task.ResidualTasks[nResidualTasks].residualOffs = task.ResidualTasks[nResidualTasks].samplesOffs; task.ResidualTasks[task.nResidualTasks].residualOffs = task.ResidualTasks[task.nResidualTasks].samplesOffs;
nResidualTasks++; task.nResidualTasks++;
} }
} }
// Fixed prediction // Constant frames
for (int order = 1; order <= max_order; order++)
{ {
task.ResidualTasks[nResidualTasks].type = (int)SubframeType.Verbatim; task.ResidualTasks[task.nResidualTasks].type = (int)SubframeType.Constant;
task.ResidualTasks[nResidualTasks].channel = ch; task.ResidualTasks[task.nResidualTasks].channel = ch;
task.ResidualTasks[nResidualTasks].obits = (int)bits_per_sample + (channels == 2 && ch == 3 ? 1 : 0); task.ResidualTasks[task.nResidualTasks].obits = (int)bits_per_sample + (channels == 2 && ch == 3 ? 1 : 0);
task.ResidualTasks[nResidualTasks].blocksize = blocksize; task.ResidualTasks[task.nResidualTasks].blocksize = blocksize;
task.ResidualTasks[nResidualTasks].residualOrder = 0; task.ResidualTasks[task.nResidualTasks].samplesOffs = ch * FlaCudaWriter.MAX_BLOCKSIZE + iFrame * blocksize;
task.ResidualTasks[nResidualTasks].samplesOffs = ch * FlaCudaWriter.MAX_BLOCKSIZE + iFrame * blocksize; task.ResidualTasks[task.nResidualTasks].residualOffs = task.ResidualTasks[task.nResidualTasks].samplesOffs;
task.ResidualTasks[nResidualTasks].residualOffs = task.ResidualTasks[nResidualTasks].samplesOffs; task.ResidualTasks[task.nResidualTasks].residualOrder = 1;
task.ResidualTasks[nResidualTasks].shift = 0; task.ResidualTasks[task.nResidualTasks].shift = 0;
task.ResidualTasks[task.nResidualTasks].coefs[0] = 1;
task.nResidualTasks++;
}
// Fixed prediction
for (int order = 0; order < 5; order++)
{
task.ResidualTasks[task.nResidualTasks].type = (int)SubframeType.Fixed;
task.ResidualTasks[task.nResidualTasks].channel = ch;
task.ResidualTasks[task.nResidualTasks].obits = (int)bits_per_sample + (channels == 2 && ch == 3 ? 1 : 0);
task.ResidualTasks[task.nResidualTasks].blocksize = blocksize;
task.ResidualTasks[task.nResidualTasks].residualOrder = order;
task.ResidualTasks[task.nResidualTasks].samplesOffs = ch * FlaCudaWriter.MAX_BLOCKSIZE + iFrame * blocksize;
task.ResidualTasks[task.nResidualTasks].residualOffs = task.ResidualTasks[task.nResidualTasks].samplesOffs;
task.ResidualTasks[task.nResidualTasks].shift = 0;
switch (order) switch (order)
{ {
case 0:
break;
case 1: case 1:
task.ResidualTasks[nResidualTasks].type = (int)SubframeType.Constant; task.ResidualTasks[task.nResidualTasks].coefs[0] = 1;
task.ResidualTasks[nResidualTasks].residualOrder = 1;
task.ResidualTasks[nResidualTasks].coefs[0] = 1;
break;
case 3:
task.ResidualTasks[nResidualTasks].type = (int)SubframeType.Fixed;
task.ResidualTasks[nResidualTasks].residualOrder = 0;
break;
case 4:
task.ResidualTasks[nResidualTasks].type = (int)SubframeType.Fixed;
task.ResidualTasks[nResidualTasks].residualOrder = 1;
task.ResidualTasks[nResidualTasks].coefs[0] = 1;
break; break;
case 2: case 2:
task.ResidualTasks[nResidualTasks].type = (int)SubframeType.Fixed; task.ResidualTasks[task.nResidualTasks].coefs[1] = 2;
task.ResidualTasks[nResidualTasks].residualOrder = 2; task.ResidualTasks[task.nResidualTasks].coefs[0] = -1;
task.ResidualTasks[nResidualTasks].coefs[1] = 2;
task.ResidualTasks[nResidualTasks].coefs[0] = -1;
break; break;
case 5: case 3:
task.ResidualTasks[nResidualTasks].type = (int)SubframeType.Fixed; task.ResidualTasks[task.nResidualTasks].coefs[2] = 3;
task.ResidualTasks[nResidualTasks].residualOrder = 3; task.ResidualTasks[task.nResidualTasks].coefs[1] = -3;
task.ResidualTasks[nResidualTasks].coefs[2] = 3; task.ResidualTasks[task.nResidualTasks].coefs[0] = 1;
task.ResidualTasks[nResidualTasks].coefs[1] = -3;
task.ResidualTasks[nResidualTasks].coefs[0] = 1;
break; break;
case 6: case 4:
task.ResidualTasks[nResidualTasks].type = (int)SubframeType.Fixed; task.ResidualTasks[task.nResidualTasks].coefs[3] = 4;
task.ResidualTasks[nResidualTasks].residualOrder = 4; task.ResidualTasks[task.nResidualTasks].coefs[2] = -6;
task.ResidualTasks[nResidualTasks].coefs[3] = 4; task.ResidualTasks[task.nResidualTasks].coefs[1] = 4;
task.ResidualTasks[nResidualTasks].coefs[2] = -6; task.ResidualTasks[task.nResidualTasks].coefs[0] = -1;
task.ResidualTasks[nResidualTasks].coefs[1] = 4;
task.ResidualTasks[nResidualTasks].coefs[0] = -1;
break; break;
} }
nResidualTasks++; task.nResidualTasks++;
}
// Filler
while ((task.nResidualTasks % task.nResidualTasksPerChannel) != 0)
{
task.ResidualTasks[task.nResidualTasks].type = (int)SubframeType.Verbatim;
task.ResidualTasks[task.nResidualTasks].channel = ch;
task.ResidualTasks[task.nResidualTasks].obits = (int)bits_per_sample + (channels == 2 && ch == 3 ? 1 : 0);
task.ResidualTasks[task.nResidualTasks].blocksize = blocksize;
task.ResidualTasks[task.nResidualTasks].residualOrder = 0;
task.ResidualTasks[task.nResidualTasks].samplesOffs = ch * FlaCudaWriter.MAX_BLOCKSIZE + iFrame * blocksize;
task.ResidualTasks[task.nResidualTasks].residualOffs = task.ResidualTasks[task.nResidualTasks].samplesOffs;
task.ResidualTasks[task.nResidualTasks].shift = 0;
task.nResidualTasks++;
} }
} }
} }
if (sizeof(encodeResidualTaskStruct) * nResidualTasks > task.residualTasksLen) if (sizeof(encodeResidualTaskStruct) * task.nResidualTasks > task.residualTasksLen)
throw new Exception("oops"); throw new Exception("oops");
if (sizeof(computeAutocorTaskStruct) * nAutocorTasks > task.autocorTasksLen) if (sizeof(computeAutocorTaskStruct) * task.nAutocorTasks > task.autocorTasksLen)
throw new Exception("oops"); throw new Exception("oops");
cuda.CopyHostToDeviceAsync(task.cudaAutocorTasks, task.autocorTasksPtr, (uint)(sizeof(computeAutocorTaskStruct) * nAutocorTasks), task.stream); cuda.CopyHostToDeviceAsync(task.cudaAutocorTasks, task.autocorTasksPtr, (uint)(sizeof(computeAutocorTaskStruct) * task.nAutocorTasks), task.stream);
cuda.CopyHostToDeviceAsync(task.cudaResidualTasks, task.residualTasksPtr, (uint)(sizeof(encodeResidualTaskStruct) * nResidualTasks), task.stream); cuda.CopyHostToDeviceAsync(task.cudaResidualTasks, task.residualTasksPtr, (uint)(sizeof(encodeResidualTaskStruct) * task.nResidualTasks), task.stream);
task.blocksize = blocksize; task.blocksize = blocksize;
} }
@@ -983,6 +992,7 @@ namespace CUETools.Codecs.FlaCuda
{ {
frame.subframes[ch].best.type = SubframeType.Verbatim; frame.subframes[ch].best.type = SubframeType.Verbatim;
frame.subframes[ch].best.size = frame.subframes[ch].obits * (uint)frame.blocksize; frame.subframes[ch].best.size = frame.subframes[ch].obits * (uint)frame.blocksize;
frame.subframes[ch].wbits = 0;
int index = ch + iFrame * channels; int index = ch + iFrame * channels;
if (task.BestResidualTasks[index].size < 0) if (task.BestResidualTasks[index].size < 0)
@@ -994,6 +1004,11 @@ namespace CUETools.Codecs.FlaCuda
frame.subframes[ch].best.order = task.BestResidualTasks[index].residualOrder; frame.subframes[ch].best.order = task.BestResidualTasks[index].residualOrder;
frame.subframes[ch].best.cbits = task.BestResidualTasks[index].cbits; frame.subframes[ch].best.cbits = task.BestResidualTasks[index].cbits;
frame.subframes[ch].best.shift = task.BestResidualTasks[index].shift; frame.subframes[ch].best.shift = task.BestResidualTasks[index].shift;
frame.subframes[ch].obits -= (uint)task.BestResidualTasks[index].wbits;
frame.subframes[ch].wbits = (uint)task.BestResidualTasks[index].wbits;
if (frame.subframes[ch].wbits != 0)
for (int i = 0; i < frame.blocksize; i++)
frame.subframes[ch].samples[i] >>= (int)frame.subframes[ch].wbits;
for (int i = 0; i < task.BestResidualTasks[index].residualOrder; i++) for (int i = 0; i < task.BestResidualTasks[index].residualOrder; i++)
frame.subframes[ch].best.coefs[i] = task.BestResidualTasks[index].coefs[task.BestResidualTasks[index].residualOrder - 1 - i]; frame.subframes[ch].best.coefs[i] = task.BestResidualTasks[index].coefs[task.BestResidualTasks[index].residualOrder - 1 - i];
if (!encode_on_cpu) if (!encode_on_cpu)
@@ -1010,17 +1025,17 @@ namespace CUETools.Codecs.FlaCuda
compute_autocorellation(blocksize, channelsCount, max_order, nFrames, task); compute_autocorellation(blocksize, channelsCount, max_order, nFrames, task);
int threads_y; int threads_y;
if (max_order >= 4 && max_order <= 8) if (task.nResidualTasksPerChannel >= 4 && task.nResidualTasksPerChannel <= 8)
threads_y = max_order; threads_y = task.nResidualTasksPerChannel;
else if ((max_order % 8) == 0) else if ((task.nResidualTasksPerChannel % 8) == 0)
threads_y = 8; threads_y = 8;
else if ((max_order % 7) == 0) else if ((task.nResidualTasksPerChannel % 7) == 0)
threads_y = 7; threads_y = 7;
else if ((max_order % 6) == 0) else if ((task.nResidualTasksPerChannel % 6) == 0)
threads_y = 6; threads_y = 6;
else if ((max_order % 5) == 0) else if ((task.nResidualTasksPerChannel % 5) == 0)
threads_y = 5; threads_y = 5;
else if ((max_order % 4) == 0) else if ((task.nResidualTasksPerChannel % 4) == 0)
threads_y = 4; threads_y = 4;
else else
throw new Exception("invalid LPC order"); throw new Exception("invalid LPC order");
@@ -1039,30 +1054,23 @@ namespace CUETools.Codecs.FlaCuda
cuda.SetParameterSize(task.cudaEstimateResidual, sizeof(uint) * 6); cuda.SetParameterSize(task.cudaEstimateResidual, sizeof(uint) * 6);
cuda.SetFunctionBlockShape(task.cudaEstimateResidual, 32, threads_y, 1); cuda.SetFunctionBlockShape(task.cudaEstimateResidual, 32, threads_y, 1);
cuda.SetParameter(task.cudaSumResidual, 0, (uint)task.cudaResidualTasks.Pointer); int nBestTasks = task.nResidualTasks / task.nResidualTasksPerChannel;
cuda.SetParameter(task.cudaSumResidual, sizeof(uint), (uint)task.cudaResidualOutput.Pointer);
cuda.SetParameter(task.cudaSumResidual, 2 * sizeof(uint), (uint)partCount);
cuda.SetParameterSize(task.cudaSumResidual, sizeof(uint) * 3U);
cuda.SetFunctionBlockShape(task.cudaSumResidual, 64, 1, 1);
int tasksPerChannel = (_windowcount + 1) * max_order;
int nBestTasks = nResidualTasks / tasksPerChannel;
cuda.SetParameter(task.cudaChooseBestMethod, 0 * sizeof(uint), (uint)task.cudaResidualTasks.Pointer); cuda.SetParameter(task.cudaChooseBestMethod, 0 * sizeof(uint), (uint)task.cudaResidualTasks.Pointer);
cuda.SetParameter(task.cudaChooseBestMethod, 1 * sizeof(uint), (uint)task.cudaResidualOutput.Pointer); cuda.SetParameter(task.cudaChooseBestMethod, 1 * sizeof(uint), (uint)task.cudaResidualOutput.Pointer);
cuda.SetParameter(task.cudaChooseBestMethod, 2 * sizeof(uint), (uint)partCount); cuda.SetParameter(task.cudaChooseBestMethod, 2 * sizeof(uint), (uint)partCount);
cuda.SetParameter(task.cudaChooseBestMethod, 3 * sizeof(uint), (uint)tasksPerChannel); cuda.SetParameter(task.cudaChooseBestMethod, 3 * sizeof(uint), (uint)task.nResidualTasksPerChannel);
cuda.SetParameterSize(task.cudaChooseBestMethod, sizeof(uint) * 4U); cuda.SetParameterSize(task.cudaChooseBestMethod, sizeof(uint) * 4U);
cuda.SetFunctionBlockShape(task.cudaChooseBestMethod, 32, 16, 1); cuda.SetFunctionBlockShape(task.cudaChooseBestMethod, 32, 16, 1);
cuda.SetParameter(task.cudaCopyBestMethod, 0, (uint)task.cudaBestResidualTasks.Pointer); cuda.SetParameter(task.cudaCopyBestMethod, 0, (uint)task.cudaBestResidualTasks.Pointer);
cuda.SetParameter(task.cudaCopyBestMethod, 1 * sizeof(uint), (uint)task.cudaResidualTasks.Pointer); cuda.SetParameter(task.cudaCopyBestMethod, 1 * sizeof(uint), (uint)task.cudaResidualTasks.Pointer);
cuda.SetParameter(task.cudaCopyBestMethod, 2 * sizeof(uint), (uint)tasksPerChannel); cuda.SetParameter(task.cudaCopyBestMethod, 2 * sizeof(uint), (uint)task.nResidualTasksPerChannel);
cuda.SetParameterSize(task.cudaCopyBestMethod, sizeof(uint) * 3U); cuda.SetParameterSize(task.cudaCopyBestMethod, sizeof(uint) * 3U);
cuda.SetFunctionBlockShape(task.cudaCopyBestMethod, 64, 1, 1); cuda.SetFunctionBlockShape(task.cudaCopyBestMethod, 64, 1, 1);
cuda.SetParameter(task.cudaCopyBestMethodStereo, 0, (uint)task.cudaBestResidualTasks.Pointer); cuda.SetParameter(task.cudaCopyBestMethodStereo, 0, (uint)task.cudaBestResidualTasks.Pointer);
cuda.SetParameter(task.cudaCopyBestMethodStereo, 1 * sizeof(uint), (uint)task.cudaResidualTasks.Pointer); cuda.SetParameter(task.cudaCopyBestMethodStereo, 1 * sizeof(uint), (uint)task.cudaResidualTasks.Pointer);
cuda.SetParameter(task.cudaCopyBestMethodStereo, 2 * sizeof(uint), (uint)tasksPerChannel); cuda.SetParameter(task.cudaCopyBestMethodStereo, 2 * sizeof(uint), (uint)task.nResidualTasksPerChannel);
cuda.SetParameterSize(task.cudaCopyBestMethodStereo, sizeof(uint) * 3U); cuda.SetParameterSize(task.cudaCopyBestMethodStereo, sizeof(uint) * 3U);
cuda.SetFunctionBlockShape(task.cudaCopyBestMethodStereo, 64, 1, 1); cuda.SetFunctionBlockShape(task.cudaCopyBestMethodStereo, 64, 1, 1);
@@ -1073,8 +1081,7 @@ namespace CUETools.Codecs.FlaCuda
cuda.SetFunctionBlockShape(task.cudaEncodeResidual, partSize, 1, 1); cuda.SetFunctionBlockShape(task.cudaEncodeResidual, partSize, 1, 1);
// issue work to the GPU // issue work to the GPU
cuda.LaunchAsync(task.cudaEstimateResidual, partCount, (nResidualTasks / threads_y * nFrames) / maxFrames, task.stream); cuda.LaunchAsync(task.cudaEstimateResidual, partCount, (task.nResidualTasks / threads_y * nFrames) / maxFrames, task.stream);
//cuda.LaunchAsync(task.cudaSumResidual, 1, (nResidualTasks * nFrames) / maxFrames, task.stream);
cuda.LaunchAsync(task.cudaChooseBestMethod, 1, (nBestTasks * nFrames) / maxFrames, task.stream); cuda.LaunchAsync(task.cudaChooseBestMethod, 1, (nBestTasks * nFrames) / maxFrames, task.stream);
if (channels == 2 && channelsCount == 4) if (channels == 2 && channelsCount == 4)
{ {
@@ -1103,6 +1110,18 @@ namespace CUETools.Codecs.FlaCuda
if (blocksize <= 4) if (blocksize <= 4)
return; return;
cuda.SetParameter(task.cudaStereoDecorr, 0, (uint)task.cudaSamples.Pointer);
cuda.SetParameter(task.cudaStereoDecorr, sizeof(uint), (uint)MAX_BLOCKSIZE);
cuda.SetParameterSize(task.cudaStereoDecorr, sizeof(uint) * 2U);
cuda.SetFunctionBlockShape(task.cudaStereoDecorr, 256, 1, 1);
cuda.SetParameter(task.cudaFindWastedBits, 0 * sizeof(uint), (uint)task.cudaResidualTasks.Pointer);
cuda.SetParameter(task.cudaFindWastedBits, 1 * sizeof(uint), (uint)task.cudaSamples.Pointer);
cuda.SetParameter(task.cudaFindWastedBits, 2 * sizeof(uint), (uint)task.nResidualTasksPerChannel);
cuda.SetParameter(task.cudaFindWastedBits, 3 * sizeof(uint), (uint)blocksize);
cuda.SetParameterSize(task.cudaFindWastedBits, sizeof(uint) * 4U);
cuda.SetFunctionBlockShape(task.cudaFindWastedBits, 256, 1, 1);
cuda.SetParameter(task.cudaComputeAutocor, 0, (uint)task.cudaAutocorOutput.Pointer); cuda.SetParameter(task.cudaComputeAutocor, 0, (uint)task.cudaAutocorOutput.Pointer);
cuda.SetParameter(task.cudaComputeAutocor, sizeof(uint), (uint)task.cudaSamples.Pointer); cuda.SetParameter(task.cudaComputeAutocor, sizeof(uint), (uint)task.cudaSamples.Pointer);
cuda.SetParameter(task.cudaComputeAutocor, sizeof(uint) * 2, (uint)cudaWindow.Pointer); cuda.SetParameter(task.cudaComputeAutocor, sizeof(uint) * 2, (uint)cudaWindow.Pointer);
@@ -1122,8 +1141,11 @@ namespace CUETools.Codecs.FlaCuda
cuda.SetFunctionBlockShape(task.cudaComputeLPC, (partCount + 31) & ~31, 1, 1); cuda.SetFunctionBlockShape(task.cudaComputeLPC, (partCount + 31) & ~31, 1, 1);
// issue work to the GPU // issue work to the GPU
cuda.LaunchAsync(task.cudaComputeAutocor, partCount, (nAutocorTasks * nFrames) / maxFrames, task.stream); if (channels == 2 && channelsCount == 4)
cuda.LaunchAsync(task.cudaComputeLPC, 1, (nAutocorTasks * nFrames) / maxFrames, task.stream); cuda.LaunchAsync(task.cudaStereoDecorr, MAX_BLOCKSIZE / 256, 1, task.stream);
cuda.LaunchAsync(task.cudaFindWastedBits, (task.nResidualTasks / task.nResidualTasksPerChannel * nFrames) / maxFrames, 1, task.stream);
cuda.LaunchAsync(task.cudaComputeAutocor, partCount, (task.nAutocorTasks * nFrames) / maxFrames, task.stream);
cuda.LaunchAsync(task.cudaComputeLPC, 1, (task.nAutocorTasks * nFrames) / maxFrames, task.stream);
} }
unsafe int encode_frame(bool doMidside, int channelCount, int iFrame, FlaCudaTask task) unsafe int encode_frame(bool doMidside, int channelCount, int iFrame, FlaCudaTask task)
@@ -1167,7 +1189,7 @@ namespace CUETools.Codecs.FlaCuda
bool doMidside = channels == 2 && eparams.do_midside; bool doMidside = channels == 2 && eparams.do_midside;
int channelCount = doMidside ? 2 * channels : channels; int channelCount = doMidside ? 2 * channels : channels;
cuda.CopyHostToDeviceAsync(task.cudaSamples, task.samplesBufferPtr, (uint)(sizeof(int) * FlaCudaWriter.MAX_BLOCKSIZE * channelCount), task.stream); cuda.CopyHostToDeviceAsync(task.cudaSamples, task.samplesBufferPtr, (uint)(sizeof(int) * FlaCudaWriter.MAX_BLOCKSIZE * channels), task.stream);
} }
unsafe void run_GPU_task(int nFrames, FlaCudaTask task) unsafe void run_GPU_task(int nFrames, FlaCudaTask task)
@@ -1761,21 +1783,22 @@ namespace CUETools.Codecs.FlaCuda
public int best_index; public int best_index;
public int channel; public int channel;
public int residualOffs; public int residualOffs;
public fixed int reserved[5]; public int wbits;
public fixed int reserved[4];
public fixed int coefs[32]; public fixed int coefs[32];
}; };
internal class FlaCudaTask internal class FlaCudaTask
{ {
CUDA cuda; CUDA cuda;
public CUfunction cudaStereoDecorr;
public CUfunction cudaFindWastedBits;
public CUfunction cudaComputeAutocor; public CUfunction cudaComputeAutocor;
public CUfunction cudaComputeLPC; public CUfunction cudaComputeLPC;
public CUfunction cudaEstimateResidual; public CUfunction cudaEstimateResidual;
public CUfunction cudaChooseBestMethod; public CUfunction cudaChooseBestMethod;
public CUfunction cudaCopyBestMethod; public CUfunction cudaCopyBestMethod;
public CUfunction cudaCopyBestMethodStereo; public CUfunction cudaCopyBestMethodStereo;
//public CUfunction cudaSumResidualChunks;
public CUfunction cudaSumResidual;
public CUfunction cudaEncodeResidual; public CUfunction cudaEncodeResidual;
public CUdeviceptr cudaSamples; public CUdeviceptr cudaSamples;
public CUdeviceptr cudaResidual; public CUdeviceptr cudaResidual;
@@ -1797,6 +1820,9 @@ namespace CUETools.Codecs.FlaCuda
public int residualTasksLen; public int residualTasksLen;
public int bestResidualTasksLen; public int bestResidualTasksLen;
public int samplesBufferLen; public int samplesBufferLen;
public int nResidualTasks = 0;
public int nAutocorTasks = 0;
public int nResidualTasksPerChannel = 0;
unsafe public FlaCudaTask(CUDA _cuda, int channelCount) unsafe public FlaCudaTask(CUDA _cuda, int channelCount)
{ {
@@ -1834,14 +1860,14 @@ namespace CUETools.Codecs.FlaCuda
} }
cudaComputeAutocor = cuda.GetModuleFunction("cudaComputeAutocor"); cudaComputeAutocor = cuda.GetModuleFunction("cudaComputeAutocor");
cudaStereoDecorr = cuda.GetModuleFunction("cudaStereoDecorr");
cudaFindWastedBits = cuda.GetModuleFunction("cudaFindWastedBits");
cudaComputeLPC = cuda.GetModuleFunction("cudaComputeLPC"); cudaComputeLPC = cuda.GetModuleFunction("cudaComputeLPC");
cudaEstimateResidual = cuda.GetModuleFunction("cudaEstimateResidual"); cudaEstimateResidual = cuda.GetModuleFunction("cudaEstimateResidual");
cudaSumResidual = cuda.GetModuleFunction("cudaSumResidual");
cudaChooseBestMethod = cuda.GetModuleFunction("cudaChooseBestMethod"); cudaChooseBestMethod = cuda.GetModuleFunction("cudaChooseBestMethod");
cudaCopyBestMethod = cuda.GetModuleFunction("cudaCopyBestMethod"); cudaCopyBestMethod = cuda.GetModuleFunction("cudaCopyBestMethod");
cudaCopyBestMethodStereo = cuda.GetModuleFunction("cudaCopyBestMethodStereo"); cudaCopyBestMethodStereo = cuda.GetModuleFunction("cudaCopyBestMethodStereo");
cudaEncodeResidual = cuda.GetModuleFunction("cudaEncodeResidual"); cudaEncodeResidual = cuda.GetModuleFunction("cudaEncodeResidual");
//cudaSumResidualChunks = cuda.GetModuleFunction("cudaSumResidualChunks");
stream = cuda.CreateStream(); stream = cuda.CreateStream();
verifyBuffer = new int[FlaCudaWriter.MAX_BLOCKSIZE * channelCount]; // should be channels, not channelCount. And should null if not doing verify! verifyBuffer = new int[FlaCudaWriter.MAX_BLOCKSIZE * channelCount]; // should be channels, not channelCount. And should null if not doing verify!

View File

@@ -49,10 +49,59 @@ typedef struct
int best_index; int best_index;
int channel; int channel;
int residualOffs; int residualOffs;
int reserved[5]; int wbits;
int reserved[4];
int coefs[32]; int coefs[32];
} encodeResidualTaskStruct; } encodeResidualTaskStruct;
extern "C" __global__ void cudaStereoDecorr(
int *samples,
int offset
)
{
const int pos = blockIdx.x * blockDim.x + threadIdx.x;
if (pos < offset)
{
int l = samples[pos];
int r = samples[offset + pos];
samples[2 * offset + pos] = (l + r) >> 1;
samples[3 * offset + pos] = l - r;
}
}
extern "C" __global__ void cudaFindWastedBits(
encodeResidualTaskStruct *tasks,
int *samples,
int tasksPerChannel,
int blocksize
)
{
__shared__ struct {
volatile int wbits[256];
encodeResidualTaskStruct task;
} shared;
if (threadIdx.x < 16)
((int*)&shared.task)[threadIdx.x] = ((int*)(&tasks[blockIdx.x * tasksPerChannel]))[threadIdx.x];
shared.wbits[threadIdx.x] = 0;
__syncthreads();
for (int pos = 0; pos < blocksize; pos += blockDim.x)
shared.wbits[threadIdx.x] |= pos + threadIdx.x < blocksize ? samples[shared.task.samplesOffs + pos + threadIdx.x] : 0;
__syncthreads();
if (threadIdx.x < 128) shared.wbits[threadIdx.x] |= shared.wbits[threadIdx.x + 128]; __syncthreads();
if (threadIdx.x < 64) shared.wbits[threadIdx.x] |= shared.wbits[threadIdx.x + 64]; __syncthreads();
if (threadIdx.x < 32) shared.wbits[threadIdx.x] |= shared.wbits[threadIdx.x + 32]; __syncthreads();
shared.wbits[threadIdx.x] |= shared.wbits[threadIdx.x + 16];
shared.wbits[threadIdx.x] |= shared.wbits[threadIdx.x + 8];
shared.wbits[threadIdx.x] |= shared.wbits[threadIdx.x + 4];
shared.wbits[threadIdx.x] |= shared.wbits[threadIdx.x + 2];
shared.wbits[threadIdx.x] |= shared.wbits[threadIdx.x + 1];
if (threadIdx.x < tasksPerChannel)
tasks[blockIdx.x * tasksPerChannel + threadIdx.x].wbits = max(0,__ffs(shared.wbits[0]) - 1);
}
extern "C" __global__ void cudaComputeAutocor( extern "C" __global__ void cudaComputeAutocor(
float *output, float *output,
const int *samples, const int *samples,
@@ -231,8 +280,8 @@ extern "C" __global__ void cudaEstimateResidual(
const int dataLen = min(frameSize - pos, partSize + max_order); const int dataLen = min(frameSize - pos, partSize + max_order);
// fetch samples // fetch samples
shared.data[tid] = tid < dataLen ? samples[shared.task[0].samplesOffs + pos + tid] : 0; shared.data[tid] = tid < dataLen ? samples[shared.task[0].samplesOffs + pos + tid] >> shared.task[0].wbits : 0;
if (tid < 32) shared.data[tid + partSize] = tid + partSize < dataLen ? samples[shared.task[0].samplesOffs + pos + tid + partSize] : 0; if (tid < 32) shared.data[tid + partSize] = tid + partSize < dataLen ? samples[shared.task[0].samplesOffs + pos + tid + partSize] >> shared.task[0].wbits : 0;
const int residualLen = max(0,min(frameSize - pos - shared.task[threadIdx.y].residualOrder, partSize)); const int residualLen = max(0,min(frameSize - pos - shared.task[threadIdx.y].residualOrder, partSize));
__syncthreads(); __syncthreads();
@@ -271,100 +320,6 @@ extern "C" __global__ void cudaEstimateResidual(
output[(blockIdx.y * blockDim.y + threadIdx.y) * 64 + blockIdx.x] = shared.residual[tid]; output[(blockIdx.y * blockDim.y + threadIdx.y) * 64 + blockIdx.x] = shared.residual[tid];
} }
// blockDim.x == 256
// gridDim.x = frameSize / chunkSize
extern "C" __global__ void cudaSumResidualChunks(
int *output,
encodeResidualTaskStruct *tasks,
int *residual,
int frameSize,
int chunkSize // <= blockDim.x(256)
)
{
__shared__ struct {
int residual[256];
int rice[32];
} shared;
// fetch parameters
const int tid = threadIdx.x;
const int residualOrder = tasks[blockIdx.y].residualOrder;
const int chunkNumber = blockIdx.x;
const int pos = chunkNumber * chunkSize;
const int residualLen = min(frameSize - pos - residualOrder, chunkSize);
// set upper residuals to zero, in case blockDim < 256
shared.residual[255 - tid] = 0;
// read residual
int res = (tid < residualLen) ? residual[blockIdx.y * 8192 + pos + tid] : 0;
// convert to unsigned
shared.residual[tid] = (2 * res) ^ (res >> 31);
__syncthreads();
// residual sum: reduction in shared mem
if (tid < 128) shared.residual[tid] += shared.residual[tid + 128]; __syncthreads();
if (tid < 64) shared.residual[tid] += shared.residual[tid + 64]; __syncthreads();
if (tid < 32) shared.residual[tid] += shared.residual[tid + 32]; __syncthreads();
shared.residual[tid] += shared.residual[tid + 16];
shared.residual[tid] += shared.residual[tid + 8];
shared.residual[tid] += shared.residual[tid + 4];
shared.residual[tid] += shared.residual[tid + 2];
shared.residual[tid] += shared.residual[tid + 1];
if (tid < 32)
{
// rice parameter search
shared.rice[tid] = __mul24(tid >= 15, 0x7fffff) + residualLen * (tid + 1) + ((shared.residual[0] - (residualLen >> 1)) >> tid);
shared.rice[tid] = min(shared.rice[tid], shared.rice[tid + 8]);
shared.rice[tid] = min(shared.rice[tid], shared.rice[tid + 4]);
shared.rice[tid] = min(shared.rice[tid], shared.rice[tid + 2]);
shared.rice[tid] = min(shared.rice[tid], shared.rice[tid + 1]);
}
// write output
if (tid == 0)
output[blockIdx.x + blockIdx.y * gridDim.x] = shared.rice[0];
}
extern "C" __global__ void cudaSumResidual(
encodeResidualTaskStruct *tasks,
int *residual,
int partCount // <= blockDim.y (256)
)
{
__shared__ struct {
volatile int partLen[256];
encodeResidualTaskStruct task;
} shared;
const int tid = threadIdx.x;
// fetch task data
if (tid < sizeof(encodeResidualTaskStruct) / sizeof(int))
((int*)&shared.task)[tid] = ((int*)(tasks + blockIdx.y))[tid];
__syncthreads();
shared.partLen[tid] = (tid < partCount) ? residual[tid + partCount * blockIdx.y] : 0;
__syncthreads();
// length sum: reduction in shared mem
//if (tid < 128) shared.partLen[tid] += shared.partLen[tid + 128]; __syncthreads();
//if (tid < 64) shared.partLen[tid] += shared.partLen[tid + 64]; __syncthreads();
if (tid < 32) shared.partLen[tid] += shared.partLen[tid + 32]; __syncthreads();
shared.partLen[tid] += shared.partLen[tid + 16];
shared.partLen[tid] += shared.partLen[tid + 8];
shared.partLen[tid] += shared.partLen[tid + 4];
shared.partLen[tid] += shared.partLen[tid + 2];
shared.partLen[tid] += shared.partLen[tid + 1];
// return sum
if (tid == 0)
tasks[blockIdx.y].size = min(shared.task.obits * shared.task.blocksize,
shared.task.type == Fixed ? shared.task.residualOrder * shared.task.obits + 6 + shared.partLen[0] :
shared.task.type == LPC ? shared.task.residualOrder * shared.task.obits + 4 + 5 + shared.task.residualOrder * shared.task.cbits + 6 + (4 * partCount/2)/* << porder */ + shared.partLen[0] :
shared.task.type == Constant ? shared.task.obits * (1 + shared.task.blocksize * (shared.partLen[0] != 0)) :
shared.task.obits * shared.task.blocksize);
}
#define BEST_INDEX(a,b) ((a) + ((b) - (a)) * (shared.length[b] < shared.length[a])) #define BEST_INDEX(a,b) ((a) + ((b) - (a)) * (shared.length[b] < shared.length[a]))
extern "C" __global__ void cudaChooseBestMethod( extern "C" __global__ void cudaChooseBestMethod(
@@ -403,12 +358,13 @@ extern "C" __global__ void cudaChooseBestMethod(
// return sum // return sum
if (threadIdx.x == 0) if (threadIdx.x == 0)
{ {
int obits = shared.task[threadIdx.y].obits - shared.task[threadIdx.y].wbits;
shared.length[task + threadIdx.y] = shared.length[task + threadIdx.y] =
min(shared.task[threadIdx.y].obits * shared.task[threadIdx.y].blocksize, min(obits * shared.task[threadIdx.y].blocksize,
shared.task[threadIdx.y].type == Fixed ? shared.task[threadIdx.y].residualOrder * shared.task[threadIdx.y].obits + 6 + shared.partLen[threadIdx.y * 32] : shared.task[threadIdx.y].type == Fixed ? shared.task[threadIdx.y].residualOrder * obits + 6 + shared.partLen[threadIdx.y * 32] :
shared.task[threadIdx.y].type == LPC ? shared.task[threadIdx.y].residualOrder * shared.task[threadIdx.y].obits + 4 + 5 + shared.task[threadIdx.y].residualOrder * shared.task[threadIdx.y].cbits + 6 + (4 * partCount/2)/* << porder */ + shared.partLen[threadIdx.y * 32] : shared.task[threadIdx.y].type == LPC ? shared.task[threadIdx.y].residualOrder * obits + 4 + 5 + shared.task[threadIdx.y].residualOrder * shared.task[threadIdx.y].cbits + 6 + (4 * partCount/2)/* << porder */ + shared.partLen[threadIdx.y * 32] :
shared.task[threadIdx.y].type == Constant ? shared.task[threadIdx.y].obits * (1 + shared.task[threadIdx.y].blocksize * (shared.partLen[threadIdx.y * 32] != 0)) : shared.task[threadIdx.y].type == Constant ? obits * (1 + shared.task[threadIdx.y].blocksize * (shared.partLen[threadIdx.y * 32] != 0)) :
shared.task[threadIdx.y].obits * shared.task[threadIdx.y].blocksize); obits * shared.task[threadIdx.y].blocksize);
} }
} }
//shared.index[threadIdx.x] = threadIdx.x; //shared.index[threadIdx.x] = threadIdx.x;
@@ -527,8 +483,8 @@ extern "C" __global__ void cudaEncodeResidual(
const int dataLen = min(shared.task.blocksize - pos, partSize + shared.task.residualOrder); const int dataLen = min(shared.task.blocksize - pos, partSize + shared.task.residualOrder);
// fetch samples // fetch samples
shared.data[tid] = tid < dataLen ? samples[shared.task.samplesOffs + pos + tid] : 0; shared.data[tid] = tid < dataLen ? samples[shared.task.samplesOffs + pos + tid] >> shared.task.wbits : 0;
if (tid < 32) shared.data[tid + partSize] = tid + partSize < dataLen ? samples[shared.task.samplesOffs + pos + tid + partSize] : 0; if (tid < 32) shared.data[tid + partSize] = tid + partSize < dataLen ? samples[shared.task.samplesOffs + pos + tid + partSize] >> shared.task.wbits : 0;
const int residualLen = max(0,min(shared.task.blocksize - pos - shared.task.residualOrder, partSize)); const int residualLen = max(0,min(shared.task.blocksize - pos - shared.task.residualOrder, partSize));
__syncthreads(); __syncthreads();

View File

@@ -110,79 +110,81 @@ code {
} }
} }
bincode { bincode {
0xd0800205 0x00400780 0xa0000209 0x04000780 0xd0800205 0x00400780 0xa0000205 0x04000780
0x30070405 0xc4100780 0x3006040d 0xc4100780 0x30070209 0xc4100780 0x3006020d 0xc4100780
0xa0000011 0x04000780 0x20000201 0x0400c780 0xa0000011 0x04000780 0x20000401 0x0400c780
0x30020805 0xc4100780 0x2000000d 0x04004780 0x30020809 0xc4100780 0x2000000d 0x04008780
0x00000005 0xc0000780 0x308109fd 0x644107c8 0x00000005 0xc0000780 0x308109fd 0x644107c8
0x00000609 0xc0000780 0xa0018003 0x00000000 0x00000609 0xc0000780 0xa0018003 0x00000000
0xa0004401 0x04200780 0x10018003 0x00000280 0xa0004401 0x04200780 0x10018003 0x00000280
0x40014e0d 0x00200780 0x3010060d 0xc4100780 0x40014e0d 0x00200780 0x3010060d 0xc4100780
0x60004e01 0x0020c780 0x20000001 0x04008780 0x60004e01 0x0020c780 0x20000001 0x04004780
0x3007000d 0xc4100780 0x30060001 0xc4100780 0x3007000d 0xc4100780 0x30060001 0xc4100780
0x20008600 0x2100ec00 0x20000201 0x04000780 0x20008600 0x2100ec00 0x20000401 0x04000780
0xd00e0001 0x80c00780 0x08045401 0xe4200780 0xd00e0001 0x80c00780 0x08045401 0xe4200780
0xf0000001 0xe0000002 0x861ffe03 0x00000000 0xf0000001 0xe0000002 0x861ffe03 0x00000000
0xa0004c0d 0x04200780 0x1000d201 0x0423c780 0xa0004c0d 0x04200780 0x1000d201 0x0423c780
0x40060215 0x00000780 0x30100a15 0xc4100780 0x40060215 0x00000780 0x30100a15 0xc4100780
0x6006000d 0x00014780 0x40054201 0x00200780 0x6006001d 0x00014780 0x40034201 0x00200780
0x30100015 0xc4100780 0x1000d201 0x0423c780 0x3010000d 0xc4100780 0x1000d201 0x0423c780
0x60044215 0x00214780 0x2100ee1c 0x2143f000 0x60024215 0x0020c780 0x2100ee00 0x2147f00c
0x20000a19 0x04010780 0x3007001d 0xac000780 0x20000a19 0x04010780 0x30000621 0xac000780
0x30060ffd 0x6c00c7c8 0xa0030003 0x00000000 0x300611fd 0x6c00c7c8 0xa0031003 0x00000000
0x1002f003 0x00000280 0xd011580d 0x20000780 0x10030003 0x00000280 0xd011580d 0x20000780
0x2d03e020 0x20088c20 0x30021021 0xc4100780 0x2d07e000 0x20008c00 0x30020001 0xc4100780
0x2000ca21 0x04220780 0xd00e1021 0x80c00780 0x2100ea24 0x1d00f400 0xd00e1225 0x80c00780
0x10030003 0x00000780 0x1000f821 0x0403c780 0x30001201 0xec000780 0x10031003 0x00000780
0x00020c0d 0xc0000782 0x0c001401 0xe4220780 0x1000f801 0x0403c780 0x00020c0d 0xc0000782
0x30820dfd 0x6c4107c8 0xa0043003 0x00000000 0x0c001401 0xe4200780 0x30820dfd 0x6c4107c8
0x10043003 0x00000280 0x2000d221 0x04218780 0xa0047003 0x00000000 0x10047003 0x00000280
0x0002100d 0xc0000780 0x30080ffd 0x6c00c7c8 0x2000d201 0x04218780 0x0002000d 0xc0000780
0xa0042003 0x00000000 0x10041003 0x00000280 0x300011fd 0x6c00c7c8 0xa0046003 0x00000000
0xd0115811 0x20000780 0x2000c00d 0x0420c784 0x10045003 0x00000280 0xd0115811 0x20000780
0x2106f21c 0x2007860c 0x3002060d 0xc4100780 0x2000d221 0x04218780 0x2000c001 0x0421c784
0x2000ca0d 0x0420c780 0xd00e060d 0x80c00780 0x2000001d 0x04020780 0x1000d401 0x0423c784
0x10042003 0x00000780 0x1000f80d 0x0403c780 0x30020e1d 0xc4100780 0x2000ca1d 0x0421c780
0x0c001401 0xe420c782 0xd411500d 0x20000782 0xd00e0e1d 0x80c00780 0x30000e01 0xec000780
0x3c00c00d 0x04200780 0xd4005005 0x20000780 0x10046003 0x00000780 0x1000f801 0x0403c780
0x0c001401 0xe4200782 0xd411500d 0x20000782
0x3c00c00d 0x0420c780 0xd4005005 0x20000780
0x861ffe03 0x00000000 0x3004cffd 0x6420c7c8 0x861ffe03 0x00000000 0x3004cffd 0x6420c7c8
0xa0059003 0x00000000 0x00020c0d 0xc0000780 0xa005d003 0x00000000 0x00020c0d 0xc0000780
0x0c025401 0xe43f0780 0x10058003 0x00000280 0x0c025401 0xe43f0780 0x1005c003 0x00000280
0xa0004401 0x04200780 0x40014e1d 0x00200780 0xa0004401 0x04200780 0x40014e1d 0x00200780
0x30100e1d 0xc4100780 0x60004e01 0x0021c780 0x30100e1d 0xc4100780 0x60004e01 0x0021c780
0x20000001 0x04008780 0x3007001d 0xc4100780 0x20000001 0x04004780 0x3007001d 0xc4100780
0x30060001 0xc4100780 0x20008e00 0x2100ec00 0x30060001 0xc4100780 0x20008e00 0x2100ec00
0x20000201 0x04000780 0x20008001 0x00000007 0x20000401 0x04000780 0x20008001 0x00000007
0xd00e0001 0x80c00780 0x10059003 0x00000780 0xd00e0001 0x80c00780 0x1005d003 0x00000780
0x1000f801 0x0403c780 0x08047401 0xe4200782 0x1000f801 0x0403c780 0x08047401 0xe4200782
0xd4112809 0x20000780 0x3883c005 0x6c608780 0xd4112809 0x20000780 0x3883c009 0x6c608780
0xa0004401 0x04200780 0xd001001d 0x04000780 0xa0004401 0x04200780 0xd002001d 0x04000780
0x30000ffd 0x640187c8 0xa0094003 0x00000000 0x30000ffd 0x640187c8 0xa0098003 0x00000000
0x10092003 0x00000280 0x3003d201 0xac200780 0x10096003 0x00000280 0x3003d201 0xac200780
0x307c000d 0x8c000780 0xd4110009 0x20000780 0x307c000d 0x8c000780 0xd4110009 0x20000780
0x387cc1fd 0x6c20c7c8 0xa007c003 0x00000000 0x387cc1fd 0x6c20c7c8 0xa0080003 0x00000000
0x1000f801 0x0403c780 0x1000f825 0x0403c780 0x1000f801 0x0403c780 0x1000f821 0x0403c780
0x1007a003 0x00000280 0x30050e05 0xc4100780 0x1007e003 0x00000280 0x30050e09 0xc4100780
0x20000221 0x04010780 0x200a9005 0x00000003 0x20000425 0x04010780 0x200a9209 0x00000003
0x0002020d 0xc0000780 0xa0077003 0x00000000 0x0002040d 0xc0000780 0xa007b003 0x00000000
0xd4000009 0x20000780 0xd8118011 0x20000780 0xd4000009 0x20000780 0xd8118011 0x20000780
0x1000c005 0x0423c784 0x6e01c225 0x80224780 0x1000c009 0x0423c784 0x6e02c221 0x80220780
0x20018001 0x00000003 0xd4110011 0x20000780 0x20018001 0x00000003 0xd4110011 0x20000780
0x3000c1fd 0x6c2147cc 0xd8000809 0x20000780 0x3000c1fd 0x6c2147cc 0xd8000809 0x20000780
0x1006f003 0x00000280 0xd4110009 0x20000782 0x10073003 0x00000280 0xd4110009 0x20000782
0x1800c001 0x0423c780 0x1007c003 0x00000780 0x1800c001 0x0423c780 0x10080003 0x00000780
0x30050e05 0xc4100780 0x20000221 0x04010780 0x30050e09 0xc4100780 0x20000425 0x04010780
0x20001001 0x04000782 0x00020009 0xc0000780 0x20001201 0x04000782 0x00020009 0xc0000780
0xd411100d 0x20000780 0x1c00c001 0x0423c780 0xd411100d 0x20000780 0x1c00c001 0x0423c780
0x30001201 0xec000780 0x2840d401 0x04200780 0x30001001 0xec000780 0x2840d401 0x04200780
0x301f0005 0xec100780 0x30010001 0xc4100780 0x301f0009 0xec100780 0x30010001 0xc4100780
0xd0000205 0x04008780 0x30080601 0x6c010780 0xd0000421 0x04008780 0x30090601 0x6c010780
0x00020c0d 0xc0000780 0xdc095009 0x20000780 0x00020c0d 0xc0000780 0xdc095009 0x20000780
0xa0000021 0x2c014780 0x1800c001 0x0423c780 0xa0000009 0x2c014780 0x1800c001 0x0423c780
0x30840205 0xac400780 0x60011005 0x80000780 0x30841021 0xac400780 0x60080409 0x80000780
0x20018e1d 0x00000003 0xa0004401 0x04200780 0x20018e1d 0x00000003 0xa0004401 0x04200780
0x30000ffd 0x640147c8 0x0c025401 0xe4204780 0x30000ffd 0x640147c8 0x0c025401 0xe4208780
0x10063003 0x00000280 0x10094003 0x00000780 0x10067003 0x00000280 0x10098003 0x00000780
0x3003d201 0xac200780 0x307c000d 0x8c000780 0x3003d201 0xac200780 0x307c000d 0x8c000780
0x00020c0d 0xc0000782 0xdc095009 0x20000780 0x00020c0d 0xc0000782 0xdc095009 0x20000780
0x1800e001 0x0423c780 0x2800c001 0x04200780 0x1800e001 0x0423c780 0x2800c001 0x04200780
@@ -191,21 +193,21 @@ code {
0x0c025401 0xe4200780 0x1900e400 0x2900e000 0x0c025401 0xe4200780 0x1900e400 0x2900e000
0x0c025401 0xe4200780 0x1900e200 0x2900e000 0x0c025401 0xe4200780 0x1900e200 0x2900e000
0x0c025401 0xe4200780 0xd4112805 0x20000780 0x0c025401 0xe4200780 0xd4112805 0x20000780
0x347cc1fd 0x6c2147c8 0xa00ac003 0x00000000 0x347cc1fd 0x6c2147c8 0xa00b0003 0x00000000
0x100a9003 0x00000280 0x00020a05 0xc0000780 0x100ad003 0x00000280 0x00020a05 0xc0000780
0xd4095005 0x20000780 0x347cc1fd 0x6c2087c8 0xd4095005 0x20000780 0x347cc1fd 0x6c2087c8
0x100ab003 0x00000280 0x10018001 0x00000003 0x100af003 0x00000280 0x10018001 0x00000003
0x100ac003 0x00000780 0x1000f801 0x0403c780 0x100b0003 0x00000780 0x1000f801 0x0403c780
0xf0000001 0xe0000002 0x20018805 0x00000003 0xf0000001 0xe0000002 0x20018809 0x00000003
0x40030c1d 0x00000780 0x60020e1d 0x0001c780 0x40050c1d 0x00000780 0x60040e1d 0x0001c780
0x00020a05 0xc0000780 0xd4095005 0x20000780 0x00020a05 0xc0000780 0xd4095005 0x20000780
0x30010615 0xec100780 0x30100e1d 0xc4100780 0x30010615 0xec100780 0x30100e1d 0xc4100780
0x2440c015 0x04214780 0x60020c0d 0x0001c780 0x2440c015 0x04214780 0x60040c0d 0x0001c780
0x307c09fd 0x640147c8 0x30040a05 0xec000780 0x307c09fd 0x640147c8 0x30040a09 0xec000780
0x30850811 0x64410780 0xa0000811 0x2c014780 0x30850811 0x64410780 0xa0000811 0x2c014780
0x407f8811 0x0007ffff 0x2004860c 0x20038204 0x407f8811 0x0007ffff 0x2004860c 0x20038408
0x4003000d 0x00000780 0x6002020d 0x0000c780 0x4005000d 0x00000780 0x6004020d 0x0000c780
0x3010060d 0xc4100780 0x60020001 0x0000c780 0x3010060d 0xc4100780 0x60040001 0x0000c780
0x00020c09 0xc0000780 0x08025401 0xe4200780 0x00020c09 0xc0000780 0x08025401 0xe4200780
0xd8095005 0x20000780 0x1400d001 0x0423c780 0xd8095005 0x20000780 0x1400d001 0x0423c780
0x3400c001 0xac200780 0x08025401 0xe4200780 0x3400c001 0xac200780 0x08025401 0xe4200780
@@ -214,9 +216,9 @@ code {
0x3400c001 0xac200780 0x08025401 0xe4200780 0x3400c001 0xac200780 0x08025401 0xe4200780
0x1400c201 0x0423c780 0x3400c001 0xac200780 0x1400c201 0x0423c780 0x3400c001 0xac200780
0x08025401 0xe4200780 0x30000003 0x00000280 0x08025401 0xe4200780 0x30000003 0x00000280
0xa0004401 0x04200780 0x40014e05 0x00200780 0xa0004401 0x04200780 0x40014e09 0x00200780
0x30100205 0xc4100780 0x60004e01 0x00204780 0x30100409 0xc4100780 0x60004e01 0x00208780
0x20000001 0x04008780 0x30060001 0xc4100780 0x20000001 0x04004780 0x30060001 0xc4100780
0xa0004c0d 0x04200780 0x20000601 0x04000780 0xa0004c0d 0x04200780 0x20000601 0x04000780
0x00020c0d 0xc0000780 0xdc095005 0x20000780 0x00020c0d 0xc0000780 0xdc095005 0x20000780
0x30020005 0xc4100780 0x1500e000 0x2101e804 0x30020005 0xc4100780 0x1500e000 0x2101e804
@@ -250,9 +252,9 @@ code {
0x30050205 0xc4100780 0x20000409 0x04004780 0x30050205 0xc4100780 0x20000409 0x04004780
0x00020405 0xc0000780 0x103f8005 0x07ffffff 0x00020405 0xc0000780 0x103f8005 0x07ffffff
0x04051001 0xe4204780 0x307ccffd 0x6c20c7ca 0x04051001 0xe4204780 0x307ccffd 0x6c20c7ca
0x100a3003 0x00000280 0x1000f811 0x0403c780 0x100a2003 0x00000280 0x1000f811 0x0403c780
0x20000815 0x0400c780 0x3005cffd 0x6420c7c8 0x20000815 0x0400c780 0x3005cffd 0x6420c7c8
0xa009f003 0x00000000 0x1009f003 0x00000280 0xa009e003 0x00000000 0x1009e003 0x00000280
0x1000ce05 0x0423c780 0x40034e09 0x00200780 0x1000ce05 0x0423c780 0x40034e09 0x00200780
0x30100409 0xc4100780 0x60024e1d 0x00208780 0x30100409 0xc4100780 0x60024e1d 0x00208780
0x30070605 0xc4100780 0x30060609 0xc4100780 0x30070605 0xc4100780 0x30060609 0xc4100780
@@ -286,133 +288,132 @@ code {
0x0c011001 0xe4204780 0x1000c205 0x0423c784 0x0c011001 0xe4204780 0x1000c205 0x0423c784
0x2000c005 0x04204784 0xa0000019 0x04000780 0x2000c005 0x04204784 0xa0000019 0x04000780
0x307c0dfd 0x640147c8 0x0c011001 0xe4204780 0x307c0dfd 0x640147c8 0x0c011001 0xe4204780
0x1009f003 0x00000280 0xd41c680d 0x20000780 0x1009e003 0x00000280 0xd41c680d 0x20000780
0x1d00e204 0x1d00e408 0x40050419 0x00000780 0x1d00ec08 0x1d00e404 0x2c40c209 0x04208780
0x60040619 0x00018780 0x30100c19 0xc4100780 0x40050419 0x00000780 0x60040619 0x00018780
0x3c82c1fd 0x6c6147c8 0x60040419 0x00018780 0x30100c19 0xc4100780 0x3c82c1fd 0x6c6147c8
0xa009c003 0x00000000 0x10070003 0x00000280 0x60040419 0x00018780 0xa009b003 0x00000000
0xd41c4005 0x20000780 0x1500e004 0x1500ec08 0x10071003 0x00000280 0xd41c4005 0x20000780
0x4005041d 0x00000780 0x6004061d 0x0001c780 0x1400c005 0x0423c780 0x4005041d 0x00000780
0x30100e1d 0xc4100780 0x60040405 0x0001c780 0x6004061d 0x0001c780 0x30100e1d 0xc4100780
0xd8044005 0x20000780 0x2400c005 0x04204780 0x60040405 0x0001c780 0xd8044005 0x20000780
0x20068205 0x00000003 0x1009c003 0x00000780 0x2400c005 0x04204780 0x20068205 0x00000003
0xd41c680d 0x20000780 0x3c83c1fd 0x6c6147c8 0x1009b003 0x00000780 0xd41c680d 0x20000780
0xa009b003 0x00000000 0x10085003 0x00000280 0x3c83c1fd 0x6c6147c8 0xa009a003 0x00000000
0x3002cc05 0xc4300780 0x301f0209 0xec100780 0x10085003 0x00000280 0x3002cc05 0xc4300780
0xd0840409 0x04400780 0x2000041d 0x04004780 0x301f021d 0xec100780 0xd0840e1d 0x04400780
0xd41c4005 0x20000780 0x1500ec08 0x1500e004 0x20000e1d 0x04004780 0xd41c4005 0x20000780
0x2400c609 0x04208780 0x40050421 0x00000780 0x2502e608 0x1500e004 0x40050421 0x00000780
0x60040621 0x00020780 0x30101021 0xc4100780 0x60040621 0x00020780 0x30101021 0xc4100780
0x60040405 0x00020780 0x30010e09 0xec100780 0x60040405 0x00020780 0x30010e09 0xec100780
0x20000205 0x04008780 0xd8044005 0x20000780 0x20000205 0x04008780 0xd8044005 0x20000780
0x2400c005 0x04204780 0x200f8205 0x00000003 0x2400c005 0x04204780 0x200f8205 0x00000003
0x1009b003 0x00000780 0xd41c680d 0x20000780 0x1009a003 0x00000780 0xd41c680d 0x20000780
0x3c7cc1fd 0x6c2147c8 0xa009a003 0x00000000 0x3c7cc1fd 0x6c2147c8 0xa0099003 0x00000000
0x10094003 0x00000280 0xd804400d 0x20000780 0x10093003 0x00000280 0xd804400d 0x20000780
0xd41c7005 0x20000780 0x3c7cc1fd 0x6c2087c8 0xd41c7805 0x20000780 0x3c7cc1fd 0x6c2087c8
0x2501e209 0x00000003 0x1400c005 0x0423c780 0x2501e005 0x00000003 0x10000805 0x2440c280
0x10000809 0x2440c280 0x4005041d 0x00000780
0x6004061d 0x0001c780 0x30100e1d 0xc4100780
0x60040405 0x0001c780 0x1009a003 0x00000780
0xd41c7005 0x20000780 0x1500e004 0x1500e208
0x4005041d 0x00000780 0x6004061d 0x0001c780 0x4005041d 0x00000780 0x6004061d 0x0001c780
0x30100e1d 0xc4100780 0x60040405 0x0001c780 0x30100e1d 0xc4100780 0x60040405 0x0001c780
0xf0000001 0xe0000002 0xf0000001 0xe0000002 0x10099003 0x00000780 0xd41c7805 0x20000780
0x30060205 0xac000782 0x00020a05 0xc0000780 0x1400c005 0x0423c780 0x4005041d 0x00000780
0x04051001 0xe4204780 0xa0004405 0x04200782 0x6004061d 0x0001c780 0x30100e1d 0xc4100780
0x20000811 0x04004780 0x3004cffd 0x6c2107c8 0x60040405 0x0001c780 0xf0000001 0xe0000002
0x10016003 0x00000280 0x861ffe03 0x00000000 0xf0000001 0xe0000002 0x30060205 0xac000782
0xa0000009 0x04000780 0xd0800209 0x00400780 0x00020a05 0xc0000780 0x04051001 0xe4204780
0xa0000405 0x04000780 0x30050205 0xc4100780 0xa0004405 0x04200782 0x20000811 0x04004780
0x20000409 0x04004780 0x308505fd 0x6c4107c8 0x3004cffd 0x6c2107c8 0x10016003 0x00000280
0xa00be003 0x00000000 0x100be003 0x00000280 0x861ffe03 0x00000000 0xa0000009 0x04000780
0xa0000009 0x04000780 0xd0800209 0x00400780 0xd0800209 0x00400780 0xa0000405 0x04000780
0xa0000405 0x04000780 0x30050205 0xc4100780 0x30050205 0xc4100780 0x20000409 0x04004780
0x20000409 0x04004780 0x00020405 0xc0000780 0x308505fd 0x6c4107c8 0xa00bd003 0x00000000
0xd418400d 0x20000780 0xd4144009 0x20000780 0x100bd003 0x00000280 0xa0000009 0x04000780
0x1c00c005 0x0423c780 0x3801c1fd 0x6c2107c8 0xd0800209 0x00400780 0xa0000405 0x04000780
0xa0000009 0x04000780 0xd0800209 0x00400780 0x30050205 0xc4100780 0x20000409 0x04004780
0xa0000405 0x04000780 0x30050205 0xc4100780 0x00020405 0xc0000780 0xd418400d 0x20000780
0x20000409 0x04004780 0x20008405 0x0000000b 0xd4144009 0x20000780 0x1c00c005 0x0423c780
0x10000405 0x0403c500 0x04001001 0xe4204780 0x3801c1fd 0x6c2107c8 0xa0000009 0x04000780
0xf0000001 0xe0000002 0x861ffe03 0x00000000 0xd0800209 0x00400780 0xa0000405 0x04000780
0xa0000009 0x04000780 0xd0800209 0x00400780 0x30050205 0xc4100780 0x20000409 0x04004780
0xa0000405 0x04000780 0x30050205 0xc4100780 0x20008405 0x0000000b 0x10000405 0x0403c500
0x20000409 0x04004780 0x308605fd 0x6c4107c8
0xa00d9003 0x00000000 0x100d9003 0x00000280
0xa0000009 0x04000780 0xd0800209 0x00400780
0xa0000405 0x04000780 0x30050205 0xc4100780
0x20000409 0x04004780 0x00020405 0xc0000780
0xd4024009 0x20000780 0x0802c00d 0xc0200780
0x0402d011 0xc0200780 0xdc14400d 0x20000780
0x1400d005 0x0423c780 0xd0144011 0x20000784
0x1d00e008 0x2941e004 0x3002c009 0x6c20c784
0xd0020205 0x04020780 0x2400d005 0x04204780
0x04001001 0xe4204780 0xf0000001 0xe0000002 0x04001001 0xe4204780 0xf0000001 0xe0000002
0x861ffe03 0x00000000 0xa0000009 0x04000780 0x861ffe03 0x00000000 0xa0000009 0x04000780
0xd0800209 0x00400780 0xa0000405 0x04000780 0xd0800209 0x00400780 0xa0000405 0x04000780
0x30050205 0xc4100780 0x20000409 0x04004780 0x30050205 0xc4100780 0x20000409 0x04004780
0x308705fd 0x6c4107c8 0xa0128003 0x00000000 0x308605fd 0x6c4107c8 0xa00d8003 0x00000000
0x10128003 0x00000280 0xa0000009 0x04000780 0x100d8003 0x00000280 0xa0000009 0x04000780
0xd0800209 0x00400780 0xa0000405 0x04000780 0xd0800209 0x00400780 0xa0000405 0x04000780
0x30050205 0xc4100780 0x20000409 0x04004780 0x30050205 0xc4100780 0x20000409 0x04004780
0x00020405 0xc0000780 0xd4014009 0x20000780 0x00020405 0xc0000780 0xd4024009 0x20000780
0x0802c00d 0xc0200780 0x0402d011 0xc0200780 0x0802c00d 0xc0200780 0x0402d011 0xc0200780
0xdc14400d 0x20000780 0x1400d005 0x0423c780 0xdc14400d 0x20000780 0x1400d005 0x0423c780
0xd0144011 0x20000784 0x1d00e008 0x2941e004 0xd0144011 0x20000784 0x1d00e008 0x2941e004
0x3002c009 0x6c20c784 0xd0020205 0x04020780 0x3002c009 0x6c20c784 0xd0020205 0x04020780
0x2400d005 0x04204780 0x04001001 0xe4204780 0x2400d005 0x04204780 0x04001001 0xe4204780
0x0402f00d 0xc0200780 0x0402d009 0xc0200780
0xdc14400d 0x20000780 0x1400d005 0x0423c780
0xd8144009 0x20000780 0x1c00c009 0x0423c780
0x2440f005 0x04204780 0x3802c009 0x6c20c780
0xd0020205 0x04020780 0x2400d005 0x04204780
0x04001001 0xe4204780 0x0402e00d 0xc0200780
0x0402d009 0xc0200780 0xdc14400d 0x20000780
0x1400d005 0x0423c780 0xd8144009 0x20000780
0x1c00c009 0x0423c780 0x2440e005 0x04204780
0x3802c009 0x6c20c780 0xd0020205 0x04020780
0x2400d005 0x04204780 0x04001001 0xe4204780
0x0402d80d 0xc0200780 0x0402d009 0xc0200780
0xdc14400d 0x20000780 0x1400d005 0x0423c780
0xd8144009 0x20000780 0x1d00e008 0x2541f804
0x3802c009 0x6c20c780 0xd0020205 0x04020780
0x2400d005 0x04204780 0x04001001 0xe4204780
0x0402d40d 0xc0200780 0x0402d009 0xc0200780
0xdc14400d 0x20000780 0x1400d005 0x0423c780
0xd8144009 0x20000780 0x1d00e008 0x2541f404
0x3802c009 0x6c20c780 0xd0020205 0x04020780
0x2400d005 0x04204780 0x04001001 0xe4204780
0x0402d20d 0xc0200780 0x0402d009 0xc0200780
0xdc14400d 0x20000780 0x1400d005 0x0423c780
0xd8144009 0x20000780 0x1d00e008 0x2541f204
0x3802c009 0x6c20c780 0xd0020205 0x04020780
0x2400d005 0x04204780 0x04001001 0xe4204780
0xf0000001 0xe0000002 0x861ffe03 0x00000000 0xf0000001 0xe0000002 0x861ffe03 0x00000000
0xa0000009 0x04000780 0xd0800209 0x00400780 0xa0000009 0x04000780 0xd0800209 0x00400780
0xa0000405 0x04000780 0x30050205 0xc4100780 0xa0000405 0x04000780 0x30050205 0xc4100780
0x20000409 0x04004780 0x307c05fd 0x6c0147c8 0x20000409 0x04004780 0x308705fd 0x6c4107c8
0xa013c003 0x00000000 0x1013c003 0x00000280 0xa0127003 0x00000000 0x10127003 0x00000280
0xa0000009 0x04000780 0xd0800209 0x00400780
0xa0000405 0x04000780 0x30050205 0xc4100780
0x20000409 0x04004780 0x00020405 0xc0000780
0xd4014009 0x20000780 0x0802c00d 0xc0200780
0x0402d011 0xc0200780 0xdc14400d 0x20000780
0x1400d005 0x0423c780 0xd0144011 0x20000784
0x1d00e008 0x2941e004 0x3002c009 0x6c20c784
0xd0020205 0x04020780 0x2400d005 0x04204780
0x04001001 0xe4204780 0x0402f00d 0xc0200780
0x0402d009 0xc0200780 0xdc14400d 0x20000780
0x1400d005 0x0423c780 0xd8144009 0x20000780
0x1c00c009 0x0423c780 0x2440f005 0x04204780
0x3802c009 0x6c20c780 0xd0020205 0x04020780
0x2400d005 0x04204780 0x04001001 0xe4204780
0x0402e00d 0xc0200780 0x0402d009 0xc0200780
0xdc14400d 0x20000780 0x1400d005 0x0423c780
0xd8144009 0x20000780 0x1c00c009 0x0423c780
0x2440e005 0x04204780 0x3802c009 0x6c20c780
0xd0020205 0x04020780 0x2400d005 0x04204780
0x04001001 0xe4204780 0x0402d80d 0xc0200780
0x0402d009 0xc0200780 0xdc14400d 0x20000780
0x1400d005 0x0423c780 0xd8144009 0x20000780
0x1d00e008 0x2541f804 0x3802c009 0x6c20c780
0xd0020205 0x04020780 0x2400d005 0x04204780
0x04001001 0xe4204780 0x0402d40d 0xc0200780
0x0402d009 0xc0200780 0xdc14400d 0x20000780
0x1400d005 0x0423c780 0xd8144009 0x20000780
0x1d00e008 0x2541f404 0x3802c009 0x6c20c780
0xd0020205 0x04020780 0x2400d005 0x04204780
0x04001001 0xe4204780 0x0402d20d 0xc0200780
0x0402d009 0xc0200780 0xdc14400d 0x20000780
0x1400d005 0x0423c780 0xd8144009 0x20000780
0x1d00e008 0x2541f204 0x3802c009 0x6c20c780
0xd0020205 0x04020780 0x2400d005 0x04204780
0x04001001 0xe4204780 0xf0000001 0xe0000002
0x861ffe03 0x00000000 0xa0000009 0x04000780
0xd0800209 0x00400780 0xa0000405 0x04000780
0x30050205 0xc4100780 0x20000409 0x04004780
0x307c05fd 0x6c0147c8 0xa013b003 0x00000000
0x1013b003 0x00000280 0x1000ce05 0x0423c780
0x40034e09 0x00200780 0x30100409 0xc4100780
0x60024e05 0x00208780 0x30070209 0xc4100780
0x3006020d 0xc4100780 0x20038408 0x2102e808
0x2000d00d 0x04204780 0x20208405 0x00000003
0xd00e020d 0xa0c00780 0xa0000009 0x04000782
0xd0800209 0x00400780 0xa0000405 0x04000780
0x30050205 0xc4100780 0x20000409 0x04004780
0x3002cffd 0x6c20c7c8 0x30000003 0x00000280
0x1000ce05 0x0423c780 0x40034e09 0x00200780 0x1000ce05 0x0423c780 0x40034e09 0x00200780
0x30100409 0xc4100780 0x60024e05 0x00208780 0x30100409 0xc4100780 0x60024e05 0x00208780
0x30070209 0xc4100780 0x3006020d 0xc4100780 0xa0000009 0x04000780 0xd0800219 0x00400780
0x20038408 0x2102e808 0x2000d00d 0x04204780 0xa0000c0d 0x04000780 0x3005060d 0xc4100780
0x20208405 0x00000003 0xd00e020d 0xa0c00780 0x20038408 0x20028204 0x3007020d 0xc4100780
0xa0000009 0x04000782 0xd0800209 0x00400780 0x30060205 0xc4100780 0xa0000009 0x04000780
0xa0000405 0x04000780 0x30050205 0xc4100780 0xd0800201 0x00400780 0xa0000001 0x04000780
0x20000409 0x04004780 0x3002cffd 0x6c20c7c8 0x30050001 0xc4100780 0x20000409 0x04000780
0x30000003 0x00000280 0x1000ce05 0x0423c780 0x00020405 0xc0000780 0x20000601 0x04004780
0x40034e09 0x00200780 0x30100409 0xc4100780 0xd4144005 0x20000780 0x2100e804 0x1500e000
0x60024e05 0x00208780 0xa0000009 0x04000780 0x20108205 0x00000003 0xd00e0201 0xa0c00781
0xd0800219 0x00400780 0xa0000c0d 0x04000780
0x3005060d 0xc4100780 0x20038408 0x20028204
0x3007020d 0xc4100780 0x30060205 0xc4100780
0xa0000009 0x04000780 0xd0800201 0x00400780
0xa0000001 0x04000780 0x30050001 0xc4100780
0x20000409 0x04000780 0x00020405 0xc0000780
0x20000601 0x04004780 0xd4144005 0x20000780
0x2100e804 0x1500e000 0x20108205 0x00000003
0xd00e0201 0xa0c00781
} }
} }
code { code {
@@ -431,48 +432,50 @@ code {
} }
} }
bincode { bincode {
0xa0000005 0x04000780 0x308003fd 0x644107c8 0xa0000009 0x04000780 0x308005fd 0x644107c8
0xa000d003 0x00000000 0x1000d003 0x00000280 0xa000d003 0x00000000 0x1000d003 0x00000280
0xa0004e01 0x04200780 0x30070009 0xc4100780 0xa0004e01 0x04200780 0x30070005 0xc4100780
0x30060001 0xc4100780 0x20000401 0x04000780 0x30060001 0xc4100780 0x20000201 0x04000780
0x30020209 0xc4100780 0x2100ec00 0x20008400 0x30020405 0xc4100780 0x2100ec00 0x20008200
0xd00e0001 0x80c00780 0x00020205 0xc0000780 0xd00e0001 0x80c00780 0x00020405 0xc0000780
0x04024e01 0xe4200780 0xf0000001 0xe0000002 0x04024e01 0xe4200780 0xf0000001 0xe0000002
0x861ffe03 0x00000000 0xa0004201 0x04200780 0x861ffe03 0x00000000 0xa0004205 0x04200780
0x40014c09 0x00200780 0x30100409 0xc4100780 0x40034c01 0x00200780 0x30100001 0xc4100780
0xd0093805 0x20000780 0x60004c09 0x00208780 0xd0093805 0x20000780 0x60024c0d 0x00200780
0x2500e00c 0x2542ee10 0x3004060d 0xac000780 0x2501e000 0x2543ee10 0x30040011 0xac000780
0x300107fd 0x6c00c7c8 0xa0020003 0x00000000 0x300209fd 0x6c00c7c8 0xa0022003 0x00000000
0x1001f003 0x00000280 0xd0094005 0x20000780 0x10021003 0x00000280 0xd0094005 0x20000780
0x2502e010 0x20048210 0x30020811 0xc4100780 0x2503e000 0x20008400 0x30020001 0xc4100780
0x2000ca11 0x04210780 0xd00e0811 0x80c00780 0x2000ca01 0x04200780 0xd00e0015 0x80c00780
0x10020003 0x00000780 0x1000f811 0x0403c780 0x1400d401 0x0423c780 0x30000a01 0xec000780
0x00020205 0xc0000782 0x308103fd 0x6c4107c8 0x10022003 0x00000780 0x1000f801 0x0403c780
0x04000e01 0xe4210780 0xa0033003 0x00000000 0x00020405 0xc0000782 0x308105fd 0x6c4107c8
0x10033003 0x00000280 0x20000011 0x04004780 0x04000e01 0xe4200780 0xa0037003 0x00000000
0x300309fd 0x6c0187c8 0x00020805 0xc0000780 0x10037003 0x00000280 0x20000201 0x04008780
0xa0032003 0x00000000 0x10031003 0x00000280 0x300401fd 0x6c0187c8 0x00020005 0xc0000780
0xd0094009 0x20000780 0x2001800c 0x2902e010 0xa0036003 0x00000000 0x10035003 0x00000280
0x2000060d 0x04010780 0x3002060d 0xc4100780 0xd0094009 0x20000780 0x20028200 0x2903e010
0x2000ca0d 0x0420c780 0xd00e060d 0x80c00780 0x20000001 0x04010780 0x30020001 0xc4100780
0x10032003 0x00000780 0x1000f80d 0x0403c780 0x2000ca01 0x04200780 0xd00e0011 0x80c00780
0x04000e01 0xe420c782 0xd0093805 0x20000782 0x1800d401 0x0423c780 0x30000801 0xec000780
0x2542ee0c 0x3503e00c 0x30030001 0xac000780 0x10036003 0x00000780 0x1000f801 0x0403c780
0x04000e01 0xe4200782 0xd0093805 0x20000782
0x2543ee00 0x3500e000 0x30000201 0xac000780
0x307c0011 0x8c000780 0x861ffe03 0x00000000 0x307c0011 0x8c000780 0x861ffe03 0x00000000
0xd0093805 0x20000780 0x347cc1fd 0x6c20c7c8 0xd0093805 0x20000780 0x347cc1fd 0x6c20c7c8
0x1000f80d 0x0403c780 0x1400c001 0x0423c780 0x1000f805 0x0403c780 0x1400c001 0x0423c780
0x1004b003 0x00000280 0x101c8001 0x00000003 0x1004f003 0x00000280 0x101c8001 0x00000003
0x00000005 0xc0000780 0x1000f815 0x0403c780 0x00000005 0xc0000780 0x1000f815 0x0403c780
0x20000a01 0x04004780 0xd409800d 0x20000780 0x20000a01 0x04008780 0xd409800d 0x20000780
0x00020009 0xc0000780 0xd0093811 0x20000780 0x00020009 0xc0000780 0xd0093811 0x20000780
0x20018a15 0x00000003 0x1c00c001 0x0423c780 0x20018a15 0x00000003 0x1c00c001 0x0423c780
0x3005c1fd 0x6c2147cc 0x6800ce0d 0x8020c780 0x3005c1fd 0x6c2147cc 0x6800ce05 0x80204780
0xd4000805 0x20000780 0x1000c001 0x0423c784 0xd4000805 0x20000780 0x1000c001 0x0423c784
0x10040003 0x00000280 0x300109fd 0x6c00c7c8 0x10044003 0x00000280 0x300209fd 0x6c00c7c8
0x30000003 0x00000280 0xd0094805 0x20000780 0x30000003 0x00000280 0xd0094805 0x20000780
0x2502f008 0x20008210 0x1500e000 0x20028204 0x2503f00c 0x20008410 0x1500e000 0x20038408
0x00020805 0xc0000780 0x30000609 0xec000780 0x00020805 0xc0000780 0x30000205 0xec000780
0x30020201 0xc4100780 0x2542ee04 0x2100e800 0x30020401 0xc4100780 0x2541ee04 0x2100e800
0xd00e0005 0xa0c00781 0xd00e0005 0xa0c00781
} }
} }
@@ -510,71 +513,73 @@ code {
} }
} }
code { code {
name = cudaSumResidualChunks name = cudaFindWastedBits
lmem = 0 lmem = 0
smem = 1188 smem = 1248
reg = 8 reg = 5
bar = 1 bar = 1
const { const {
segname = const segname = const
segnum = 1 segnum = 1
offset = 0 offset = 0
bytes = 20 bytes = 24
mem { mem {
0x0000007f 0x0000003f 0x0000001f 0x0000000e 0x0000000f 0x0000007f 0x0000003f 0x0000001f
0x007fffff 0x00000020 0x0000009e
} }
} }
bincode { bincode {
0x10000005 0x0403c780 0xa0004c09 0x04200780 0xa0000005 0x04000780 0x308003fd 0x644107c8
0x1000d001 0x0423c780 0xa0004e0d 0x04200780 0xa0010003 0x00000000 0x30020209 0xc4100780
0x40050015 0x00000780 0x30070619 0xc4100780 0x10010003 0x00000280 0x1000cc01 0x0423c780
0x3006061d 0xc4100780 0xa0000411 0x04000780 0x40014c0d 0x00200780 0x3010060d 0xc4100780
0x60040215 0x00014780 0x20000c19 0x0401c780 0x60004c01 0x0020c780 0x3007000d 0xc4100780
0x30008805 0x00000003 0x30100a15 0xc4100780 0x30060001 0xc4100780 0x20008600 0x2100e800
0x00020205 0xc0000780 0x60040001 0x00014780 0x20000401 0x04000780 0xd00e0001 0x80c00780
0x2000ca05 0x04218780 0xd00e0205 0x80c00780 0x00000405 0xc0000780 0x04021001 0xe4200780
0x04021001 0xe43f0780 0x2140ee14 0x20418a04 0x00000405 0xc0000782 0x04001001 0xe43f0780
0x3001d005 0xac200780 0x300403fd 0x6c00c7c8 0x861ffe03 0x00000000 0x307ccffd 0x6c20c7c8
0x300d0615 0xc4100500 0x20000001 0x04014500 0x1002a003 0x00000280 0xa000420d 0x04200780
0x20000801 0x04000500 0x30020001 0xc4100500 0x1000f811 0x0403c780 0x20000801 0x04004780
0x2000cc01 0x04200500 0xd00e0001 0x80c00500 0x3000cffd 0x6420c7c8 0xa0025003 0x00000000
0x1000f801 0x0403c280 0x301f0015 0xec100780 0x10023003 0x00000280 0xd0084805 0x20000780
0x30010001 0xc4100780 0xd0000a01 0x04008780 0x2504e000 0x20008200 0x30020001 0xc4100780
0x00020805 0xc0000780 0x04001201 0xe4200780 0x2000ca01 0x04200780 0xd00e0001 0x80c00780
0x861ffe03 0x00000000 0x308009fd 0x6c4107c8 0x00000405 0xc0000780 0xd400d001 0x04204780
0xd4044809 0x20000500 0x1800c001 0x0423c500 0x10025003 0x00000780 0x00000405 0xc0000780
0x2400d201 0x04200500 0x04001201 0xe4200500 0x1400d001 0x0423c780 0x20000811 0x0400c782
0x861ffe03 0x00000000 0x308109fd 0x6c4107c8 0x00000405 0xc0000780 0x3004cffd 0x6c2107c8
0xd4024809 0x20000500 0x1800c001 0x0423c500 0x04001001 0xe4200780 0x10017003 0x00000280
0x2400d201 0x04200500 0x04001201 0xe4200500 0x861ffe03 0x00000000 0x308103fd 0x644107c8
0x861ffe03 0x00000000 0x30820801 0x6c40c7d0 0x00000405 0xc0000500 0xd4044009 0x20000500
0xa00001fd 0x0c0147c8 0xd4014809 0x20001680 0x1800c001 0x0423c500 0xd400d001 0x04204500
0x1800c001 0x0423d680 0x2400d201 0x04201680 0x04001001 0xe4200500 0x861ffe03 0x00000000
0x04001201 0xe4201680 0x861ffe03 0x00000000 0x308203fd 0x644107c8 0x00000405 0xc0000500
0x1400f201 0x0423c780 0x2400d201 0x04200780 0xd4024009 0x20000500 0x1800c001 0x0423c500
0x04001201 0xe4200780 0x2400e201 0x04200780 0xd400d001 0x04204500 0x04001001 0xe4200500
0x04001201 0xe4200780 0x2400da01 0x04200780 0x861ffe03 0x00000000 0x308303fd 0x644107c8
0x04001201 0xe4200780 0x2400d601 0x04200780 0x00000405 0xc0000500 0xd4014009 0x20000500
0x04001201 0xe4200780 0x2400d401 0x04200780 0x1800c001 0x0423c500 0xd400d001 0x04204500
0x04001201 0xe4200780 0xa0057003 0x00000000 0x04001001 0xe4200500 0x861ffe03 0x00000000
0x10057003 0x00000100 0x20018801 0x00000003 0x00000405 0xc0000780 0x1400f001 0x0423c780
0x40010415 0x00000780 0x60000615 0x00014780 0xd400d001 0x04204780 0x04001001 0xe4200780
0x30100a19 0xc4100780 0x30830815 0x6c410780 0x1400e001 0x0423c780 0xd400d001 0x04204780
0x3001021d 0xec100780 0x60000401 0x00018780 0x04001001 0xe4200780 0x1400d801 0x0423c780
0xa0000a05 0x2c014780 0x2040d215 0x0421c780 0xd400d001 0x04204780 0x04001001 0xe4200780
0x60840201 0x80400780 0x30040a05 0xec000780 0x1400d401 0x0423c780 0xd400d001 0x04204780
0x20000001 0x04004780 0xd4085009 0x20000780 0x04001001 0xe4200780 0x1400d201 0x0423c780
0x04021201 0xe4200780 0x3800ce01 0xac200780 0xd400d001 0x04204780 0x3001cdfd 0x6420c7c8
0x04021201 0xe4200780 0x3800c601 0xac200780 0x04001001 0xe4200780 0x30000003 0x00000280
0x04021201 0xe4200780 0x3800c201 0xac200780 0x3100f001 0x00000003 0xd000d001 0x042007c0
0x04021201 0xe4200780 0x3800c001 0xac200780 0xa0000001 0x44064680 0x30170001 0xec100680
0x04021201 0xe4200780 0x307c09fd 0x6c0147ca 0x31000009 0x04414680 0x10000809 0x2440c100
0x30000003 0x00000280 0x40074801 0x00200780 0x1000cc01 0x0423c780 0x40014c0d 0x00200780
0x30100001 0xc4100780 0x60064801 0x00200780 0x3010060d 0xc4100780 0x60004c01 0x0020c780
0x20000001 0x04008780 0xd0084805 0x20000780 0x20000001 0x04004780 0x30070005 0xc4100780
0x30020005 0xc4100780 0x1500e000 0x2101e804 0x30060001 0xc4100780 0x20000201 0x04000780
0xd00e0201 0xa0c00781 0x301f8405 0x00000003 0x2000c801 0x04200780
0x307c0205 0x8c000780 0x202c8001 0x00000003
0xd00e0005 0xa0c00781
} }
} }
code { code {
@@ -718,6 +723,27 @@ code {
0xf0000001 0xe0000001 0xf0000001 0xe0000001
} }
} }
code {
name = cudaStereoDecorr
lmem = 0
smem = 24
reg = 6
bar = 0
bincode {
0x10004205 0x0023c780 0xa0000005 0x04000780
0x60014c01 0x00204780 0x3000cbfd 0x6c20c7c8
0x30000003 0x00000280 0x2000ca05 0x04200780
0x30020009 0xc4100780 0x3002020d 0xc4100780
0x2000c805 0x04208780 0xd00e0205 0x80c00780
0x2000c809 0x0420c780 0xd00e0409 0x80c00780
0x3001ca0d 0xc4300780 0x20038010 0x2103ea0c
0x30020815 0xc4100780 0x20028210 0x20038000
0x2000c815 0x04214780 0x3001080d 0xec100780
0x30020011 0xc4100780 0x20400201 0x04008780
0xd00e0a0d 0xa0c00780 0x2000c805 0x04210780
0xd00e0201 0xa0c00781
}
}
code { code {
name = cudaCopyBestMethodStereo name = cudaCopyBestMethodStereo
lmem = 0 lmem = 0
@@ -798,82 +824,3 @@ code {
0xd00e0201 0xa0c00781 0xd00e0201 0xa0c00781
} }
} }
code {
name = cudaSumResidual
lmem = 0
smem = 1244
reg = 7
bar = 1
const {
segname = const
segnum = 1
offset = 0
bytes = 20
mem {
0x0000002f 0x0000001f 0x00000008 0x00000020
0x00000001
}
}
bincode {
0xa0000005 0x04000780 0x308003fd 0x644107c8
0xa000d003 0x00000000 0x30020209 0xc4100780
0x1000d003 0x00000280 0xa0004e01 0x04200780
0x3007000d 0xc4100780 0x30060001 0xc4100780
0x20008600 0x2100e800 0x20000401 0x04000780
0xd00e0001 0x80c00780 0x00000405 0xc0000780
0x04020e01 0xe4200780 0xf0000001 0xe0000002
0x861ffe03 0x00000000 0x3001cdfd 0x6c20c7c8
0xa001c003 0x00000000 0x1001b003 0x00000280
0x1000cc01 0x0423c780 0x40014e0d 0x00200780
0x3010060d 0xc4100780 0x60004e01 0x0020c780
0x20000001 0x04004780 0x30020001 0xc4100780
0x2000ca01 0x04200780 0xd00e0001 0x80c00780
0x1001c003 0x00000780 0x1000f801 0x0403c780
0x00000405 0xc0000782 0x04000e01 0xe4200780
0x861ffe03 0x00000000 0x308103fd 0x6c4107c8
0x00000405 0xc0000500 0xd4013809 0x20000500
0x1800c001 0x0423c500 0x2400ce01 0x04200500
0x04000e01 0xe4200500 0x861ffe03 0x00000000
0x00000405 0xc0000780 0x1400ee01 0x0423c780
0x2400ce01 0x04200780 0x04000e01 0xe4200780
0x1500fe00 0x2500ee00 0x04000e01 0xe4200780
0x1500f600 0x2500ee00 0x04000e01 0xe4200780
0x1500f200 0x2500ee00 0x04000e01 0xe4200780
0x1500f000 0x2500ee00 0x307c03fd 0x6c0147c8
0x04000e01 0xe4200780 0x30000003 0x00000280
0xd0086005 0x20000780 0x3482c1fd 0x6c6147c8
0x10044003 0x00000280 0xd0083805 0x20000780
0x1500ec00 0x1500e008 0x4005000c 0x1500ee04
0x6004020d 0x0000c780 0x40020211 0x00000780
0x3010060d 0xc4100780 0x60030011 0x00010780
0x60040009 0x0000c780 0x3010080d 0xc4100780
0x2000ce09 0x04208780 0x60020001 0x0000c780
0x20068405 0x00000003 0x10070003 0x00000780
0xd0086005 0x20000780 0x3483c1fd 0x6c6147c8
0x1005c003 0x00000280 0xd0083805 0x20000780
0x1500ec00 0x1500e008 0x2400c60d 0x04200780
0x3002cc15 0xc4300780 0x40070805 0x00000780
0x301f0a19 0xec100780 0x60060a11 0x00004780
0x1400ce05 0x0423c780 0xd0840c19 0x04400780
0x30100811 0xc4100780 0x20000c15 0x04014780
0x60060809 0x00010780 0x40010411 0x00000780
0x30010a0d 0xec100780 0x60000611 0x00010780
0x20000409 0x0400c780 0x3010080d 0xc4100780
0x2000ce09 0x04208780 0x60000401 0x0000c780
0x200f8405 0x00000003 0x10070003 0x00000780
0xd0086005 0x20000780 0x1500e400 0x1500e204
0x40030009 0x00000780 0x60020209 0x00008780
0x30100409 0xc4100780 0x347cc1fd 0x6c2147c8
0x60020001 0x00008780 0x1006f003 0x00000280
0xd0086805 0x20000780 0x2501e209 0x00000003
0x1400c005 0x0423c780 0x4005040d 0x00000780
0x6004060d 0x0000c780 0x3010060d 0xc4100780
0x60040409 0x0000c780 0x307ccffd 0x6c2147c8
0x1400c005 0x0423c780 0x10000405 0x0403c280
0x10070003 0x00000780 0x10000005 0x0403c780
0xa0004e09 0x04200780 0x3007040d 0xc4100780
0x30060409 0xc4100780 0x20028608 0x2102e808
0x30000201 0xac000780 0x20108405 0x00000003
0xd00e0201 0xa0c00781
}
}