mirror of
https://github.com/claunia/cuetools.net.git
synced 2025-12-16 18:14:25 +00:00
optimizations
This commit is contained in:
@@ -851,7 +851,6 @@ namespace CUETools.Codecs.FlaCuda
|
|||||||
unsafe void initialize_autocorTasks(int blocksize, int channelsCount, int max_order, int nFrames, FlaCudaTask task)
|
unsafe void initialize_autocorTasks(int blocksize, int channelsCount, int max_order, int nFrames, FlaCudaTask task)
|
||||||
{
|
{
|
||||||
computeAutocorTaskStruct* autocorTasks = (computeAutocorTaskStruct*)task.autocorTasksPtr;
|
computeAutocorTaskStruct* autocorTasks = (computeAutocorTaskStruct*)task.autocorTasksPtr;
|
||||||
encodeResidualTaskStruct* residualTasks = (encodeResidualTaskStruct*)task.residualTasksPtr;
|
|
||||||
nAutocorTasks = 0;
|
nAutocorTasks = 0;
|
||||||
nResidualTasks = 0;
|
nResidualTasks = 0;
|
||||||
for (int iFrame = 0; iFrame < nFrames; iFrame++)
|
for (int iFrame = 0; iFrame < nFrames; iFrame++)
|
||||||
@@ -869,40 +868,44 @@ namespace CUETools.Codecs.FlaCuda
|
|||||||
// LPC tasks
|
// LPC tasks
|
||||||
for (int order = 1; order <= max_order; order++)
|
for (int order = 1; order <= max_order; order++)
|
||||||
{
|
{
|
||||||
residualTasks[nResidualTasks].residualOrder = order <= max_order ? order : 0;
|
task.ResidualTasks[nResidualTasks].type = (int)SubframeType.LPC;
|
||||||
residualTasks[nResidualTasks].samplesOffs = ch * FlaCudaWriter.MAX_BLOCKSIZE + iFrame * blocksize;
|
task.ResidualTasks[nResidualTasks].obits = (int)bits_per_sample + (channels == 2 && ch == 3 ? 1 : 0);
|
||||||
|
task.ResidualTasks[nResidualTasks].blocksize = blocksize;
|
||||||
|
task.ResidualTasks[nResidualTasks].residualOrder = order <= max_order ? order : 0;
|
||||||
|
task.ResidualTasks[nResidualTasks].samplesOffs = ch * FlaCudaWriter.MAX_BLOCKSIZE + iFrame * blocksize;
|
||||||
nResidualTasks++;
|
nResidualTasks++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Fixed prediction
|
// Fixed prediction
|
||||||
for (int order = 1; order <= max_order; order++)
|
for (int order = 1; order <= max_order; order++)
|
||||||
{
|
{
|
||||||
residualTasks[nResidualTasks].residualOrder = order <= 4 ? order : 0;
|
task.ResidualTasks[nResidualTasks].type = order <= 5 ? (int)SubframeType.Fixed : (int)SubframeType.Verbatim;
|
||||||
residualTasks[nResidualTasks].samplesOffs = ch * FlaCudaWriter.MAX_BLOCKSIZE + iFrame * blocksize;
|
task.ResidualTasks[nResidualTasks].obits = (int)bits_per_sample + (channels == 2 && ch == 3 ? 1 : 0);
|
||||||
residualTasks[nResidualTasks].shift = 0;
|
task.ResidualTasks[nResidualTasks].blocksize = blocksize;
|
||||||
|
task.ResidualTasks[nResidualTasks].residualOrder = order <= 4 ? order : 0;
|
||||||
|
task.ResidualTasks[nResidualTasks].samplesOffs = ch * FlaCudaWriter.MAX_BLOCKSIZE + iFrame * blocksize;
|
||||||
|
task.ResidualTasks[nResidualTasks].shift = 0;
|
||||||
switch (order)
|
switch (order)
|
||||||
{
|
{
|
||||||
case 5:
|
case 5:
|
||||||
residualTasks[nResidualTasks].residualOrder = 1;
|
|
||||||
residualTasks[nResidualTasks].coefs[0] = 0;
|
|
||||||
break;
|
break;
|
||||||
case 1:
|
case 1:
|
||||||
residualTasks[nResidualTasks].coefs[0] = 1;
|
task.ResidualTasks[nResidualTasks].coefs[0] = 1;
|
||||||
break;
|
break;
|
||||||
case 2:
|
case 2:
|
||||||
residualTasks[nResidualTasks].coefs[1] = 2;
|
task.ResidualTasks[nResidualTasks].coefs[1] = 2;
|
||||||
residualTasks[nResidualTasks].coefs[0] = -1;
|
task.ResidualTasks[nResidualTasks].coefs[0] = -1;
|
||||||
break;
|
break;
|
||||||
case 3:
|
case 3:
|
||||||
residualTasks[nResidualTasks].coefs[2] = 3;
|
task.ResidualTasks[nResidualTasks].coefs[2] = 3;
|
||||||
residualTasks[nResidualTasks].coefs[1] = -3;
|
task.ResidualTasks[nResidualTasks].coefs[1] = -3;
|
||||||
residualTasks[nResidualTasks].coefs[0] = 1;
|
task.ResidualTasks[nResidualTasks].coefs[0] = 1;
|
||||||
break;
|
break;
|
||||||
case 4:
|
case 4:
|
||||||
residualTasks[nResidualTasks].coefs[3] = 4;
|
task.ResidualTasks[nResidualTasks].coefs[3] = 4;
|
||||||
residualTasks[nResidualTasks].coefs[2] = -6;
|
task.ResidualTasks[nResidualTasks].coefs[2] = -6;
|
||||||
residualTasks[nResidualTasks].coefs[1] = 4;
|
task.ResidualTasks[nResidualTasks].coefs[1] = 4;
|
||||||
residualTasks[nResidualTasks].coefs[0] = -1;
|
task.ResidualTasks[nResidualTasks].coefs[0] = -1;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
nResidualTasks++;
|
nResidualTasks++;
|
||||||
@@ -970,7 +973,6 @@ namespace CUETools.Codecs.FlaCuda
|
|||||||
|
|
||||||
unsafe void select_best_methods(FlacFrame frame, int channelsCount, int max_order, int iFrame, FlaCudaTask task)
|
unsafe void select_best_methods(FlacFrame frame, int channelsCount, int max_order, int iFrame, FlaCudaTask task)
|
||||||
{
|
{
|
||||||
encodeResidualTaskStruct* residualTasks = (encodeResidualTaskStruct*)task.residualTasksPtr;
|
|
||||||
for (int ch = 0; ch < channelsCount; ch++)
|
for (int ch = 0; ch < channelsCount; ch++)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
@@ -994,51 +996,59 @@ namespace CUETools.Codecs.FlaCuda
|
|||||||
if (frame.blocksize <= 4)
|
if (frame.blocksize <= 4)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
// LPC
|
|
||||||
for (int ch = 0; ch < channelsCount; ch++)
|
for (int ch = 0; ch < channelsCount; ch++)
|
||||||
{
|
{
|
||||||
for (int iWindow = 0; iWindow < _windowcount; iWindow++)
|
int index = ch + iFrame * channelsCount;
|
||||||
|
if (frame.subframes[ch].best.size > task.BestResidualTasks[index].size)
|
||||||
{
|
{
|
||||||
for (int order = 1; order <= max_order && order < frame.blocksize; order++)
|
frame.subframes[ch].best.type = (SubframeType)task.BestResidualTasks[index].type;
|
||||||
{
|
frame.subframes[ch].best.size = (uint)task.BestResidualTasks[index].size;
|
||||||
int index = (order - 1) + max_order * (iWindow + (_windowcount + 1) * (ch + iFrame * channelsCount));
|
frame.subframes[ch].best.order = task.BestResidualTasks[index].residualOrder;
|
||||||
int cbits = residualTasks[index].cbits;
|
frame.subframes[ch].best.cbits = task.BestResidualTasks[index].cbits;
|
||||||
int nbits = order * (int)frame.subframes[ch].obits + 4 + 5 + order * cbits + 6 + residualTasks[index].size;
|
frame.subframes[ch].best.shift = task.BestResidualTasks[index].shift;
|
||||||
if (residualTasks[index].residualOrder != order)
|
for (int i = 0; i < task.BestResidualTasks[index].residualOrder; i++)
|
||||||
throw new Exception("oops");
|
frame.subframes[ch].best.coefs[i] = task.BestResidualTasks[index].coefs[task.BestResidualTasks[index].residualOrder - 1 - i];
|
||||||
if (frame.subframes[ch].best.size > nbits)
|
AudioSamples.MemCpy(frame.subframes[ch].best.residual + frame.subframes[ch].best.order, (int*)task.residualBufferPtr + task.BestResidualTasks[index].samplesOffs, frame.blocksize - frame.subframes[ch].best.order);
|
||||||
{
|
|
||||||
frame.subframes[ch].best.type = SubframeType.LPC;
|
|
||||||
frame.subframes[ch].best.size = (uint)nbits;
|
|
||||||
frame.subframes[ch].best.order = order;
|
|
||||||
frame.subframes[ch].best.window = iWindow;
|
|
||||||
frame.subframes[ch].best.cbits = cbits;
|
|
||||||
frame.subframes[ch].best.shift = residualTasks[index].shift;
|
|
||||||
for (int i = 0; i < order; i++)
|
|
||||||
frame.subframes[ch].best.coefs[i] = residualTasks[index].coefs[order - 1 - i];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
//for (int iWindow = 0; iWindow < _windowcount; iWindow++)
|
||||||
|
//{
|
||||||
|
// for (int order = 1; order <= max_order && order < frame.blocksize; order++)
|
||||||
|
// {
|
||||||
|
// int index = (order - 1) + max_order * (iWindow + (_windowcount + 1) * (ch + iFrame * channelsCount));
|
||||||
|
// if (task.ResidualTasks[index].residualOrder != order || task.ResidualTasks[index].type != (int)SubframeType.LPC)
|
||||||
|
// throw new Exception("oops");
|
||||||
|
// if (frame.subframes[ch].best.size > task.ResidualTasks[index].size)
|
||||||
|
// {
|
||||||
|
// frame.subframes[ch].best.type = SubframeType.LPC;
|
||||||
|
// frame.subframes[ch].best.size = (uint)task.ResidualTasks[index].size;
|
||||||
|
// frame.subframes[ch].best.order = task.ResidualTasks[index].residualOrder;
|
||||||
|
// //frame.subframes[ch].best.window = iWindow;
|
||||||
|
// frame.subframes[ch].best.cbits = task.ResidualTasks[index].cbits;
|
||||||
|
// frame.subframes[ch].best.shift = task.ResidualTasks[index].shift;
|
||||||
|
// for (int i = 0; i < order; i++)
|
||||||
|
// frame.subframes[ch].best.coefs[i] = task.ResidualTasks[index].coefs[order - 1 - i];
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
//}
|
||||||
}
|
}
|
||||||
|
|
||||||
// FIXED
|
// FIXED
|
||||||
for (int ch = 0; ch < channelsCount; ch++)
|
//for (int ch = 0; ch < channelsCount; ch++)
|
||||||
{
|
//{
|
||||||
for (int order = 1; order <= 5 && order <= max_order && order < frame.blocksize; order++)
|
// for (int order = 1; order <= 5 && order <= max_order && order < frame.blocksize; order++)
|
||||||
{
|
// {
|
||||||
int index = (order - 1) + max_order * (_windowcount + (_windowcount + 1) * (ch + iFrame * channelsCount));
|
// int index = (order - 1) + max_order * (_windowcount + (_windowcount + 1) * (ch + iFrame * channelsCount));
|
||||||
int forder = order == 5 ? 0 : order;
|
// int forder = order == 5 ? 0 : order;
|
||||||
int nbits = forder * (int)frame.subframes[ch].obits + 6 + residualTasks[index].size;
|
// if (task.ResidualTasks[index].residualOrder != (order == 5 ? 1 : order))
|
||||||
if (residualTasks[index].residualOrder != (order == 5 ? 1 : order))
|
// throw new Exception("oops");
|
||||||
throw new Exception("oops");
|
// if (frame.subframes[ch].best.size > task.ResidualTasks[index].size)
|
||||||
if (frame.subframes[ch].best.size > nbits)
|
// {
|
||||||
{
|
// frame.subframes[ch].best.type = SubframeType.Fixed;
|
||||||
frame.subframes[ch].best.type = SubframeType.Fixed;
|
// frame.subframes[ch].best.size = (uint)task.ResidualTasks[index].size;
|
||||||
frame.subframes[ch].best.size = (uint)nbits;
|
// frame.subframes[ch].best.order = forder;
|
||||||
frame.subframes[ch].best.order = forder;
|
// }
|
||||||
}
|
// }
|
||||||
}
|
//}
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
unsafe void estimate_residual(int blocksize, int channelsCount, int max_order, int nFrames, FlaCudaTask task)
|
unsafe void estimate_residual(int blocksize, int channelsCount, int max_order, int nFrames, FlaCudaTask task)
|
||||||
@@ -1063,7 +1073,7 @@ namespace CUETools.Codecs.FlaCuda
|
|||||||
threads_y = 4;
|
threads_y = 4;
|
||||||
else
|
else
|
||||||
throw new Exception("invalid LPC order");
|
throw new Exception("invalid LPC order");
|
||||||
int partSize = 32 * (threads_y - 1);
|
int partSize = 32 * threads_y;
|
||||||
int partCount = (blocksize + partSize - 1) / partSize;
|
int partCount = (blocksize + partSize - 1) / partSize;
|
||||||
|
|
||||||
if (partCount > maxResidualParts)
|
if (partCount > maxResidualParts)
|
||||||
@@ -1085,10 +1095,27 @@ namespace CUETools.Codecs.FlaCuda
|
|||||||
cuda.SetParameterSize(task.cudaSumResidual, sizeof(uint) * 4U);
|
cuda.SetParameterSize(task.cudaSumResidual, sizeof(uint) * 4U);
|
||||||
cuda.SetFunctionBlockShape(task.cudaSumResidual, 64, 1, 1);
|
cuda.SetFunctionBlockShape(task.cudaSumResidual, 64, 1, 1);
|
||||||
|
|
||||||
|
int tasksPerChannel = (_windowcount + 1) * max_order;
|
||||||
|
int nBestTasks = nResidualTasks / tasksPerChannel;
|
||||||
|
cuda.SetParameter(task.cudaChooseBestResidual, 0, (uint)task.cudaBestResidualTasks.Pointer);
|
||||||
|
cuda.SetParameter(task.cudaChooseBestResidual, 1 * sizeof(uint), (uint)task.cudaResidualTasks.Pointer);
|
||||||
|
cuda.SetParameter(task.cudaChooseBestResidual, 2 * sizeof(uint), (uint)tasksPerChannel);
|
||||||
|
cuda.SetParameterSize(task.cudaChooseBestResidual, sizeof(uint) * 3U);
|
||||||
|
cuda.SetFunctionBlockShape(task.cudaChooseBestResidual, 256, 1, 1);
|
||||||
|
|
||||||
|
cuda.SetParameter(task.cudaEncodeResidual, 0, (uint)task.cudaResidual.Pointer);
|
||||||
|
cuda.SetParameter(task.cudaEncodeResidual, 1 * sizeof(uint), (uint)task.cudaSamples.Pointer);
|
||||||
|
cuda.SetParameter(task.cudaEncodeResidual, 2 * sizeof(uint), (uint)task.cudaBestResidualTasks.Pointer);
|
||||||
|
cuda.SetParameterSize(task.cudaEncodeResidual, sizeof(uint) * 3U);
|
||||||
|
cuda.SetFunctionBlockShape(task.cudaEncodeResidual, partSize, 1, 1);
|
||||||
|
|
||||||
// issue work to the GPU
|
// issue work to the GPU
|
||||||
cuda.LaunchAsync(task.cudaEstimateResidual, partCount, (nResidualTasks / threads_y * nFrames) / maxFrames, task.stream);
|
cuda.LaunchAsync(task.cudaEstimateResidual, partCount, (nResidualTasks / threads_y * nFrames) / maxFrames, task.stream);
|
||||||
cuda.LaunchAsync(task.cudaSumResidual, 1, (nResidualTasks * nFrames) / maxFrames, task.stream);
|
cuda.LaunchAsync(task.cudaSumResidual, 1, (nResidualTasks * nFrames) / maxFrames, task.stream);
|
||||||
cuda.CopyDeviceToHostAsync(task.cudaResidualTasks, task.residualTasksPtr, (uint)(sizeof(encodeResidualTaskStruct) * ((nResidualTasks * nFrames) / maxFrames)), task.stream);
|
cuda.LaunchAsync(task.cudaChooseBestResidual, 1, (nBestTasks * nFrames) / maxFrames, task.stream);
|
||||||
|
//cuda.LaunchAsync(task.cudaEncodeResidual, partCount, (nBestTasks * nFrames) / maxFrames, task.stream);
|
||||||
|
cuda.CopyDeviceToHostAsync(task.cudaBestResidualTasks, task.bestResidualTasksPtr, (uint)(sizeof(encodeResidualTaskStruct) * ((nBestTasks * nFrames) / maxFrames)), task.stream);
|
||||||
|
//cuda.CopyDeviceToHostAsync(task.cudaResidual, task.residualBufferPtr, (uint)(sizeof(int) * FlaCudaWriter.MAX_BLOCKSIZE * channelsCount), task.stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
unsafe void compute_autocorellation(int blocksize, int channelsCount, int max_order, int nFrames, FlaCudaTask task)
|
unsafe void compute_autocorellation(int blocksize, int channelsCount, int max_order, int nFrames, FlaCudaTask task)
|
||||||
@@ -1762,7 +1789,10 @@ namespace CUETools.Codecs.FlaCuda
|
|||||||
public int shift;
|
public int shift;
|
||||||
public int cbits;
|
public int cbits;
|
||||||
public int size;
|
public int size;
|
||||||
public fixed int reserved[11];
|
public int type;
|
||||||
|
public int obits;
|
||||||
|
public int blocksize;
|
||||||
|
public fixed int reserved[8];
|
||||||
public fixed int coefs[32];
|
public fixed int coefs[32];
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -1772,23 +1802,29 @@ namespace CUETools.Codecs.FlaCuda
|
|||||||
public CUfunction cudaComputeAutocor;
|
public CUfunction cudaComputeAutocor;
|
||||||
public CUfunction cudaComputeLPC;
|
public CUfunction cudaComputeLPC;
|
||||||
public CUfunction cudaEstimateResidual;
|
public CUfunction cudaEstimateResidual;
|
||||||
|
public CUfunction cudaChooseBestResidual;
|
||||||
//public CUfunction cudaSumResidualChunks;
|
//public CUfunction cudaSumResidualChunks;
|
||||||
public CUfunction cudaSumResidual;
|
public CUfunction cudaSumResidual;
|
||||||
//public CUfunction cudaEncodeResidual;
|
public CUfunction cudaEncodeResidual;
|
||||||
public CUdeviceptr cudaSamples;
|
public CUdeviceptr cudaSamples;
|
||||||
|
public CUdeviceptr cudaResidual;
|
||||||
public CUdeviceptr cudaAutocorTasks;
|
public CUdeviceptr cudaAutocorTasks;
|
||||||
public CUdeviceptr cudaAutocorOutput;
|
public CUdeviceptr cudaAutocorOutput;
|
||||||
public CUdeviceptr cudaResidualTasks;
|
public CUdeviceptr cudaResidualTasks;
|
||||||
public CUdeviceptr cudaResidualOutput;
|
public CUdeviceptr cudaResidualOutput;
|
||||||
|
public CUdeviceptr cudaBestResidualTasks;
|
||||||
public IntPtr samplesBufferPtr = IntPtr.Zero;
|
public IntPtr samplesBufferPtr = IntPtr.Zero;
|
||||||
|
public IntPtr residualBufferPtr = IntPtr.Zero;
|
||||||
public IntPtr autocorTasksPtr = IntPtr.Zero;
|
public IntPtr autocorTasksPtr = IntPtr.Zero;
|
||||||
public IntPtr residualTasksPtr = IntPtr.Zero;
|
public IntPtr residualTasksPtr = IntPtr.Zero;
|
||||||
|
public IntPtr bestResidualTasksPtr = IntPtr.Zero;
|
||||||
public CUstream stream;
|
public CUstream stream;
|
||||||
public int[] verifyBuffer;
|
public int[] verifyBuffer;
|
||||||
public int blocksize = 0;
|
public int blocksize = 0;
|
||||||
public FlacFrame frame;
|
public FlacFrame frame;
|
||||||
public int autocorTasksLen;
|
public int autocorTasksLen;
|
||||||
public int residualTasksLen;
|
public int residualTasksLen;
|
||||||
|
public int bestResidualTasksLen;
|
||||||
public int samplesBufferLen;
|
public int samplesBufferLen;
|
||||||
|
|
||||||
unsafe public FlaCudaTask(CUDA _cuda, int channelCount)
|
unsafe public FlaCudaTask(CUDA _cuda, int channelCount)
|
||||||
@@ -1797,23 +1833,32 @@ namespace CUETools.Codecs.FlaCuda
|
|||||||
|
|
||||||
autocorTasksLen = sizeof(computeAutocorTaskStruct) * channelCount * lpc.MAX_LPC_WINDOWS * FlaCudaWriter.maxFrames;
|
autocorTasksLen = sizeof(computeAutocorTaskStruct) * channelCount * lpc.MAX_LPC_WINDOWS * FlaCudaWriter.maxFrames;
|
||||||
residualTasksLen = sizeof(encodeResidualTaskStruct) * channelCount * lpc.MAX_LPC_ORDER * (lpc.MAX_LPC_WINDOWS + 1) * FlaCudaWriter.maxFrames;
|
residualTasksLen = sizeof(encodeResidualTaskStruct) * channelCount * lpc.MAX_LPC_ORDER * (lpc.MAX_LPC_WINDOWS + 1) * FlaCudaWriter.maxFrames;
|
||||||
|
bestResidualTasksLen = sizeof(encodeResidualTaskStruct) * channelCount * FlaCudaWriter.maxFrames;
|
||||||
samplesBufferLen = sizeof(int) * FlaCudaWriter.MAX_BLOCKSIZE * channelCount;
|
samplesBufferLen = sizeof(int) * FlaCudaWriter.MAX_BLOCKSIZE * channelCount;
|
||||||
|
|
||||||
cudaSamples = cuda.Allocate((uint)samplesBufferLen);
|
cudaSamples = cuda.Allocate((uint)samplesBufferLen);
|
||||||
|
cudaResidual = cuda.Allocate((uint)samplesBufferLen);
|
||||||
cudaAutocorTasks = cuda.Allocate((uint)autocorTasksLen);
|
cudaAutocorTasks = cuda.Allocate((uint)autocorTasksLen);
|
||||||
cudaAutocorOutput = cuda.Allocate((uint)(sizeof(float) * channelCount * lpc.MAX_LPC_WINDOWS * (lpc.MAX_LPC_ORDER + 1) * FlaCudaWriter.maxAutocorParts));
|
cudaAutocorOutput = cuda.Allocate((uint)(sizeof(float) * channelCount * lpc.MAX_LPC_WINDOWS * (lpc.MAX_LPC_ORDER + 1) * FlaCudaWriter.maxAutocorParts));
|
||||||
cudaResidualTasks = cuda.Allocate((uint)residualTasksLen);
|
cudaResidualTasks = cuda.Allocate((uint)residualTasksLen);
|
||||||
|
cudaBestResidualTasks = cuda.Allocate((uint)bestResidualTasksLen);
|
||||||
cudaResidualOutput = cuda.Allocate((uint)(sizeof(int) * channelCount * (lpc.MAX_LPC_WINDOWS + 1) * lpc.MAX_LPC_ORDER * FlaCudaWriter.maxResidualParts));
|
cudaResidualOutput = cuda.Allocate((uint)(sizeof(int) * channelCount * (lpc.MAX_LPC_WINDOWS + 1) * lpc.MAX_LPC_ORDER * FlaCudaWriter.maxResidualParts));
|
||||||
CUResult cuErr = CUDADriver.cuMemAllocHost(ref samplesBufferPtr, (uint)samplesBufferLen);
|
CUResult cuErr = CUDADriver.cuMemAllocHost(ref samplesBufferPtr, (uint)samplesBufferLen);
|
||||||
|
if (cuErr == CUResult.Success)
|
||||||
|
cuErr = CUDADriver.cuMemAllocHost(ref residualBufferPtr, (uint)samplesBufferLen);
|
||||||
if (cuErr == CUResult.Success)
|
if (cuErr == CUResult.Success)
|
||||||
cuErr = CUDADriver.cuMemAllocHost(ref autocorTasksPtr, (uint)autocorTasksLen);
|
cuErr = CUDADriver.cuMemAllocHost(ref autocorTasksPtr, (uint)autocorTasksLen);
|
||||||
if (cuErr == CUResult.Success)
|
if (cuErr == CUResult.Success)
|
||||||
cuErr = CUDADriver.cuMemAllocHost(ref residualTasksPtr, (uint)residualTasksLen);
|
cuErr = CUDADriver.cuMemAllocHost(ref residualTasksPtr, (uint)residualTasksLen);
|
||||||
|
if (cuErr == CUResult.Success)
|
||||||
|
cuErr = CUDADriver.cuMemAllocHost(ref bestResidualTasksPtr, (uint)bestResidualTasksLen);
|
||||||
if (cuErr != CUResult.Success)
|
if (cuErr != CUResult.Success)
|
||||||
{
|
{
|
||||||
if (samplesBufferPtr != IntPtr.Zero) CUDADriver.cuMemFreeHost(samplesBufferPtr); samplesBufferPtr = IntPtr.Zero;
|
if (samplesBufferPtr != IntPtr.Zero) CUDADriver.cuMemFreeHost(samplesBufferPtr); samplesBufferPtr = IntPtr.Zero;
|
||||||
|
if (residualBufferPtr != IntPtr.Zero) CUDADriver.cuMemFreeHost(residualBufferPtr); residualBufferPtr = IntPtr.Zero;
|
||||||
if (autocorTasksPtr != IntPtr.Zero) CUDADriver.cuMemFreeHost(autocorTasksPtr); autocorTasksPtr = IntPtr.Zero;
|
if (autocorTasksPtr != IntPtr.Zero) CUDADriver.cuMemFreeHost(autocorTasksPtr); autocorTasksPtr = IntPtr.Zero;
|
||||||
if (residualTasksPtr != IntPtr.Zero) CUDADriver.cuMemFreeHost(residualTasksPtr); residualTasksPtr = IntPtr.Zero;
|
if (residualTasksPtr != IntPtr.Zero) CUDADriver.cuMemFreeHost(residualTasksPtr); residualTasksPtr = IntPtr.Zero;
|
||||||
|
if (bestResidualTasksPtr != IntPtr.Zero) CUDADriver.cuMemFreeHost(bestResidualTasksPtr); bestResidualTasksPtr = IntPtr.Zero;
|
||||||
throw new CUDAException(cuErr);
|
throw new CUDAException(cuErr);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1821,8 +1866,9 @@ namespace CUETools.Codecs.FlaCuda
|
|||||||
cudaComputeLPC = cuda.GetModuleFunction("cudaComputeLPC");
|
cudaComputeLPC = cuda.GetModuleFunction("cudaComputeLPC");
|
||||||
cudaEstimateResidual = cuda.GetModuleFunction("cudaEstimateResidual");
|
cudaEstimateResidual = cuda.GetModuleFunction("cudaEstimateResidual");
|
||||||
cudaSumResidual = cuda.GetModuleFunction("cudaSumResidual");
|
cudaSumResidual = cuda.GetModuleFunction("cudaSumResidual");
|
||||||
|
cudaChooseBestResidual = cuda.GetModuleFunction("cudaChooseBestResidual");
|
||||||
|
cudaEncodeResidual = cuda.GetModuleFunction("cudaEncodeResidual");
|
||||||
//cudaSumResidualChunks = cuda.GetModuleFunction("cudaSumResidualChunks");
|
//cudaSumResidualChunks = cuda.GetModuleFunction("cudaSumResidualChunks");
|
||||||
//cudaEncodeResidual = cuda.GetModuleFunction("cudaEncodeResidual");
|
|
||||||
|
|
||||||
stream = cuda.CreateStream();
|
stream = cuda.CreateStream();
|
||||||
verifyBuffer = new int[FlaCudaWriter.MAX_BLOCKSIZE * channelCount]; // should be channels, not channelCount. And should null if not doing verify!
|
verifyBuffer = new int[FlaCudaWriter.MAX_BLOCKSIZE * channelCount]; // should be channels, not channelCount. And should null if not doing verify!
|
||||||
@@ -1832,14 +1878,34 @@ namespace CUETools.Codecs.FlaCuda
|
|||||||
public void Dispose()
|
public void Dispose()
|
||||||
{
|
{
|
||||||
cuda.Free(cudaSamples);
|
cuda.Free(cudaSamples);
|
||||||
|
cuda.Free(cudaResidual);
|
||||||
cuda.Free(cudaAutocorTasks);
|
cuda.Free(cudaAutocorTasks);
|
||||||
cuda.Free(cudaAutocorOutput);
|
cuda.Free(cudaAutocorOutput);
|
||||||
cuda.Free(cudaResidualTasks);
|
cuda.Free(cudaResidualTasks);
|
||||||
cuda.Free(cudaResidualOutput);
|
cuda.Free(cudaResidualOutput);
|
||||||
|
cuda.Free(cudaBestResidualTasks);
|
||||||
CUDADriver.cuMemFreeHost(samplesBufferPtr);
|
CUDADriver.cuMemFreeHost(samplesBufferPtr);
|
||||||
|
CUDADriver.cuMemFreeHost(residualBufferPtr);
|
||||||
CUDADriver.cuMemFreeHost(residualTasksPtr);
|
CUDADriver.cuMemFreeHost(residualTasksPtr);
|
||||||
|
CUDADriver.cuMemFreeHost(bestResidualTasksPtr);
|
||||||
CUDADriver.cuMemFreeHost(autocorTasksPtr);
|
CUDADriver.cuMemFreeHost(autocorTasksPtr);
|
||||||
cuda.DestroyStream(stream);
|
cuda.DestroyStream(stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public unsafe encodeResidualTaskStruct* ResidualTasks
|
||||||
|
{
|
||||||
|
get
|
||||||
|
{
|
||||||
|
return (encodeResidualTaskStruct*)residualTasksPtr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public unsafe encodeResidualTaskStruct* BestResidualTasks
|
||||||
|
{
|
||||||
|
get
|
||||||
|
{
|
||||||
|
return (encodeResidualTaskStruct*)bestResidualTasksPtr;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -28,6 +28,14 @@ typedef struct
|
|||||||
int blocksize;
|
int blocksize;
|
||||||
} computeAutocorTaskStruct;
|
} computeAutocorTaskStruct;
|
||||||
|
|
||||||
|
typedef enum
|
||||||
|
{
|
||||||
|
Constant = 0,
|
||||||
|
Verbatim = 1,
|
||||||
|
Fixed = 8,
|
||||||
|
LPC = 32
|
||||||
|
} SubframeType;
|
||||||
|
|
||||||
typedef struct
|
typedef struct
|
||||||
{
|
{
|
||||||
int residualOrder; // <= 32
|
int residualOrder; // <= 32
|
||||||
@@ -35,7 +43,10 @@ typedef struct
|
|||||||
int shift;
|
int shift;
|
||||||
int cbits;
|
int cbits;
|
||||||
int size;
|
int size;
|
||||||
int reserved[11];
|
int type;
|
||||||
|
int obits;
|
||||||
|
int blocksize;
|
||||||
|
int reserved[8];
|
||||||
int coefs[32];
|
int coefs[32];
|
||||||
} encodeResidualTaskStruct;
|
} encodeResidualTaskStruct;
|
||||||
|
|
||||||
@@ -161,11 +172,11 @@ extern "C" __global__ void cudaComputeLPC(
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Levinson-Durbin recursion
|
// Levinson-Durbin recursion
|
||||||
shared.ldr[tid] += (tid < order) * __fmul_rz(reff, shared.ldr[order - 1 - tid]) + (tid == order) * reff;
|
shared.ldr[tid] += (tid < order) * __fmul_rz(reff, shared.ldr[order - 1 - tid]) + (tid == order) * reff;
|
||||||
|
|
||||||
// Quantization
|
// Quantization
|
||||||
int precision = 13 - (order > 8);
|
int precision = 13 - (order > 8);
|
||||||
int taskNo = shared.task.residualOffs + order;
|
int taskNo = shared.task.residualOffs + order;
|
||||||
shared.bits[tid] = __mul24((33 - __clz(__float2int_rn(fabs(shared.ldr[tid]) * (1 << 15))) - precision), tid <= order);
|
shared.bits[tid] = __mul24((33 - __clz(__float2int_rn(fabs(shared.ldr[tid]) * (1 << 15))) - precision), tid <= order);
|
||||||
shared.bits[tid] = max(shared.bits[tid], shared.bits[tid + 16]);
|
shared.bits[tid] = max(shared.bits[tid], shared.bits[tid + 16]);
|
||||||
shared.bits[tid] = max(shared.bits[tid], shared.bits[tid + 8]);
|
shared.bits[tid] = max(shared.bits[tid], shared.bits[tid + 8]);
|
||||||
@@ -189,8 +200,8 @@ extern "C" __global__ void cudaComputeLPC(
|
|||||||
int cbits = shared.bits[0];
|
int cbits = shared.bits[0];
|
||||||
if (tid == 0)
|
if (tid == 0)
|
||||||
output[taskNo].cbits = cbits;
|
output[taskNo].cbits = cbits;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// blockDim.x == 32
|
// blockDim.x == 32
|
||||||
@@ -201,11 +212,11 @@ extern "C" __global__ void cudaEstimateResidual(
|
|||||||
encodeResidualTaskStruct *tasks,
|
encodeResidualTaskStruct *tasks,
|
||||||
int max_order,
|
int max_order,
|
||||||
int frameSize,
|
int frameSize,
|
||||||
int partSize // should be 224
|
int partSize // should be blockDim.x * blockDim.y == 256
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
__shared__ struct {
|
__shared__ struct {
|
||||||
int data[32*8];
|
int data[32*9];
|
||||||
volatile int residual[32*8];
|
volatile int residual[32*8];
|
||||||
encodeResidualTaskStruct task[8];
|
encodeResidualTaskStruct task[8];
|
||||||
} shared;
|
} shared;
|
||||||
@@ -218,22 +229,24 @@ extern "C" __global__ void cudaEstimateResidual(
|
|||||||
|
|
||||||
// fetch samples
|
// fetch samples
|
||||||
shared.data[tid] = tid < dataLen ? samples[shared.task[0].samplesOffs + pos + tid] : 0;
|
shared.data[tid] = tid < dataLen ? samples[shared.task[0].samplesOffs + pos + tid] : 0;
|
||||||
const int residualLen = max(0,min(frameSize - pos - shared.task[threadIdx.y].residualOrder, partSize)) * (shared.task[threadIdx.y].residualOrder != 0);
|
if (tid < 32) shared.data[tid + partSize] = tid + partSize < dataLen ? samples[shared.task[0].samplesOffs + pos + tid + partSize] : 0;
|
||||||
|
const int residualLen = max(0,min(frameSize - pos - shared.task[threadIdx.y].residualOrder, partSize));
|
||||||
|
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
|
|
||||||
shared.residual[tid] = 0;
|
shared.residual[tid] = 0;
|
||||||
shared.task[threadIdx.y].coefs[threadIdx.x] = threadIdx.x < max_order ? tasks[blockIdx.y * blockDim.y + threadIdx.y].coefs[threadIdx.x] : 0;
|
shared.task[threadIdx.y].coefs[threadIdx.x] = threadIdx.x < max_order ? tasks[blockIdx.y * blockDim.y + threadIdx.y].coefs[threadIdx.x] : 0;
|
||||||
|
|
||||||
for (int i = threadIdx.x; i - threadIdx.x < residualLen; i += blockDim.x) // += 32
|
for (int i = blockDim.y * (shared.task[threadIdx.y].type == Verbatim); i < blockDim.y; i++) // += 32
|
||||||
{
|
{
|
||||||
|
int ptr = threadIdx.x + (i<<5);
|
||||||
// compute residual
|
// compute residual
|
||||||
int sum = 0;
|
int sum = 0;
|
||||||
int c = 0;
|
int c = 0;
|
||||||
for (c = 0; c < shared.task[threadIdx.y].residualOrder; c++)
|
for (c = 0; c < shared.task[threadIdx.y].residualOrder; c++)
|
||||||
sum += __mul24(shared.data[i + c], shared.task[threadIdx.y].coefs[c]);
|
sum += __mul24(shared.data[ptr + c], shared.task[threadIdx.y].coefs[c]);
|
||||||
sum = shared.data[i + c] - (sum >> shared.task[threadIdx.y].shift);
|
sum = shared.data[ptr + c] - (sum >> shared.task[threadIdx.y].shift);
|
||||||
shared.residual[tid] += __mul24(i < residualLen, (sum << 1) ^ (sum >> 31));
|
shared.residual[tid] += __mul24(ptr < residualLen, (sum << 1) ^ (sum >> 31));
|
||||||
}
|
}
|
||||||
|
|
||||||
// enable this line when using blockDim.x == 64
|
// enable this line when using blockDim.x == 64
|
||||||
@@ -250,7 +263,7 @@ extern "C" __global__ void cudaEstimateResidual(
|
|||||||
shared.residual[tid] = min(shared.residual[tid], shared.residual[tid + 4]);
|
shared.residual[tid] = min(shared.residual[tid], shared.residual[tid + 4]);
|
||||||
shared.residual[tid] = min(shared.residual[tid], shared.residual[tid + 2]);
|
shared.residual[tid] = min(shared.residual[tid], shared.residual[tid + 2]);
|
||||||
shared.residual[tid] = min(shared.residual[tid], shared.residual[tid + 1]);
|
shared.residual[tid] = min(shared.residual[tid], shared.residual[tid + 1]);
|
||||||
if (threadIdx.x == 0 && shared.task[threadIdx.y].residualOrder != 0)
|
if (threadIdx.x == 0)
|
||||||
output[(blockIdx.y * blockDim.y + threadIdx.y) * gridDim.x + blockIdx.x] = shared.residual[tid];
|
output[(blockIdx.y * blockDim.y + threadIdx.y) * gridDim.x + blockIdx.x] = shared.residual[tid];
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -342,17 +355,79 @@ extern "C" __global__ void cudaSumResidual(
|
|||||||
shared.partLen[tid] += shared.partLen[tid + 1];
|
shared.partLen[tid] += shared.partLen[tid + 1];
|
||||||
// return sum
|
// return sum
|
||||||
if (tid == 0)
|
if (tid == 0)
|
||||||
tasks[blockIdx.y].size = shared.partLen[0];
|
tasks[blockIdx.y].size = shared.task.type == Fixed ?
|
||||||
|
shared.task.residualOrder * shared.task.obits + 6 + shared.partLen[0] : shared.task.type == LPC ?
|
||||||
|
shared.task.residualOrder * shared.task.obits + 4 + 5 + shared.task.residualOrder * shared.task.cbits + 6 + (4 * partCount/2)/* << porder */ + shared.partLen[0] :
|
||||||
|
shared.task.obits * shared.task.blocksize;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define BEST_INDEX(a,b) ((a) + ((b) - (a)) * (shared.length[b] < shared.length[a]))
|
||||||
|
|
||||||
|
extern "C" __global__ void cudaChooseBestResidual(
|
||||||
|
encodeResidualTaskStruct *tasks_out,
|
||||||
|
encodeResidualTaskStruct *tasks,
|
||||||
|
int count
|
||||||
|
)
|
||||||
|
{
|
||||||
|
__shared__ struct {
|
||||||
|
volatile int index[128];
|
||||||
|
int length[256];
|
||||||
|
} shared;
|
||||||
|
|
||||||
|
//shared.index[threadIdx.x] = threadIdx.x;
|
||||||
|
shared.length[threadIdx.x] = (threadIdx.x < count) ? tasks[threadIdx.x + count * blockIdx.y].size : 0x7fffffff;
|
||||||
|
|
||||||
|
__syncthreads();
|
||||||
|
|
||||||
|
//if (threadIdx.x < 128) shared.index[threadIdx.x] = BEST_INDEX(shared.index[threadIdx.x], shared.index[threadIdx.x + 128]); __syncthreads();
|
||||||
|
if (threadIdx.x < 128) shared.index[threadIdx.x] = BEST_INDEX(threadIdx.x, threadIdx.x + 128); __syncthreads();
|
||||||
|
if (threadIdx.x < 64) shared.index[threadIdx.x] = BEST_INDEX(shared.index[threadIdx.x], shared.index[threadIdx.x + 64]); __syncthreads();
|
||||||
|
if (threadIdx.x < 32)
|
||||||
|
{
|
||||||
|
shared.index[threadIdx.x] = BEST_INDEX(shared.index[threadIdx.x], shared.index[threadIdx.x + 32]);
|
||||||
|
shared.index[threadIdx.x] = BEST_INDEX(shared.index[threadIdx.x], shared.index[threadIdx.x + 16]);
|
||||||
|
shared.index[threadIdx.x] = BEST_INDEX(shared.index[threadIdx.x], shared.index[threadIdx.x + 8]);
|
||||||
|
shared.index[threadIdx.x] = BEST_INDEX(shared.index[threadIdx.x], shared.index[threadIdx.x + 4]);
|
||||||
|
shared.index[threadIdx.x] = BEST_INDEX(shared.index[threadIdx.x], shared.index[threadIdx.x + 2]);
|
||||||
|
shared.index[threadIdx.x] = BEST_INDEX(shared.index[threadIdx.x], shared.index[threadIdx.x + 1]);
|
||||||
|
}
|
||||||
|
__syncthreads();
|
||||||
|
if (threadIdx.x < sizeof(encodeResidualTaskStruct)/sizeof(int))
|
||||||
|
((int*)(tasks_out + blockIdx.y))[threadIdx.x] = ((int*)(tasks + count * blockIdx.y + shared.index[0]))[threadIdx.x];
|
||||||
|
// if (threadIdx.x == 0)
|
||||||
|
//tasks[count * blockIdx.y].best = count * blockIdx.y + shared.index[0];
|
||||||
}
|
}
|
||||||
|
|
||||||
extern "C" __global__ void cudaEncodeResidual(
|
extern "C" __global__ void cudaEncodeResidual(
|
||||||
int*output,
|
int*output,
|
||||||
int*samples,
|
int*samples,
|
||||||
encodeResidualTaskStruct *tasks,
|
encodeResidualTaskStruct *tasks
|
||||||
int frameSize,
|
|
||||||
int partSize // should be <= blockDim - max_order
|
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
|
__shared__ struct {
|
||||||
|
int data[256 + 32];
|
||||||
|
encodeResidualTaskStruct task;
|
||||||
|
} shared;
|
||||||
|
const int tid = threadIdx.x;
|
||||||
|
if (threadIdx.x < sizeof(encodeResidualTaskStruct))
|
||||||
|
((int*)&shared.task)[threadIdx.x] = ((int*)(&tasks[blockIdx.y]))[threadIdx.x];
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
|
const int partSize = blockDim.x;
|
||||||
|
const int pos = blockIdx.x * partSize;
|
||||||
|
const int dataLen = min(shared.task.blocksize - pos, partSize + shared.task.residualOrder);
|
||||||
|
|
||||||
|
// fetch samples
|
||||||
|
shared.data[tid] = tid < dataLen ? samples[shared.task.samplesOffs + pos + tid] : 0;
|
||||||
|
if (tid < 32) shared.data[tid + partSize] = tid + partSize < dataLen ? samples[shared.task.samplesOffs + pos + tid + partSize] : 0;
|
||||||
|
const int residualLen = max(0,min(shared.task.blocksize - pos - shared.task.residualOrder, partSize));
|
||||||
|
|
||||||
|
__syncthreads();
|
||||||
|
|
||||||
|
// compute residual
|
||||||
|
int sum = 0;
|
||||||
|
for (int c = 0; c < shared.task.residualOrder; c++)
|
||||||
|
sum += __mul24(shared.data[tid + c], shared.task.coefs[c]);
|
||||||
|
if (tid < residualLen)
|
||||||
|
output[shared.task.samplesOffs + pos + tid] = shared.data[tid + shared.task.residualOrder] - (sum >> shared.task.shift);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -1,6 +1,93 @@
|
|||||||
architecture {sm_10}
|
architecture {sm_10}
|
||||||
abiversion {1}
|
abiversion {1}
|
||||||
modname {cubin}
|
modname {cubin}
|
||||||
|
code {
|
||||||
|
name = cudaChooseBestResidual
|
||||||
|
lmem = 0
|
||||||
|
smem = 1564
|
||||||
|
reg = 7
|
||||||
|
bar = 1
|
||||||
|
const {
|
||||||
|
segname = const
|
||||||
|
segnum = 1
|
||||||
|
offset = 0
|
||||||
|
bytes = 16
|
||||||
|
mem {
|
||||||
|
0x0000007f 0x0000003f 0x0000001f 0x0000002f
|
||||||
|
}
|
||||||
|
}
|
||||||
|
bincode {
|
||||||
|
0xa0000009 0x04000780 0x3002cdfd 0x6420c7c8
|
||||||
|
0xa0010003 0x00000000 0x1000f003 0x00000280
|
||||||
|
0x1000cc01 0x0423c780 0x40014e05 0x00200780
|
||||||
|
0x30100205 0xc4100780 0x60004e01 0x00204780
|
||||||
|
0x20000001 0x04008780 0x30070005 0xc4100780
|
||||||
|
0x30060001 0xc4100780 0x20008200 0x2100ea00
|
||||||
|
0x20108001 0x00000003 0xd00e0001 0x80c00780
|
||||||
|
0x10010003 0x00000780 0x103f8001 0x07ffffff
|
||||||
|
0x00020405 0xc0000782 0x04010e01 0xe4200780
|
||||||
|
0x3002040d 0xc4100780 0x861ffe03 0x00000000
|
||||||
|
0x308005fd 0x644107c8 0xa001f003 0x00000000
|
||||||
|
0x1001f003 0x00000280 0x00000605 0xc0000780
|
||||||
|
0xd408380d 0x20000780 0xd4043809 0x20000780
|
||||||
|
0x1c00c001 0x0423c780 0x3800c1fd 0x6c2107c8
|
||||||
|
0x20008401 0x0000000b 0x10000401 0x0403c500
|
||||||
|
0x04000e01 0xe4200780 0xf0000001 0xe0000002
|
||||||
|
0x861ffe03 0x00000000 0x308105fd 0x644107c8
|
||||||
|
0xa0030003 0x00000000 0x10030003 0x00000280
|
||||||
|
0x00000605 0xc0000780 0xd4023809 0x20000780
|
||||||
|
0x0802c00d 0xc0200780 0x0402ce11 0xc0200780
|
||||||
|
0xdc04380d 0x20000780 0x1400ce01 0x0423c780
|
||||||
|
0xd0043811 0x20000784 0x1d00e004 0x2940e000
|
||||||
|
0x3001c005 0x6c20c784 0xd0010001 0x04020780
|
||||||
|
0x2400ce01 0x04200780 0x04000e01 0xe4200780
|
||||||
|
0xf0000001 0xe0000002 0x861ffe03 0x00000000
|
||||||
|
0x308205fd 0x644107c8 0xa0074003 0x00000000
|
||||||
|
0x10074003 0x00000280 0x00000605 0xc0000780
|
||||||
|
0xd4013809 0x20000780 0x0802c00d 0xc0200780
|
||||||
|
0x0402ce11 0xc0200780 0xdc04380d 0x20000780
|
||||||
|
0x1400ce01 0x0423c780 0xd0043811 0x20000784
|
||||||
|
0x1d00e004 0x2940e000 0x3001c005 0x6c20c784
|
||||||
|
0xd0010001 0x04020780 0x2400ce01 0x04200780
|
||||||
|
0x04000e01 0xe4200780 0x0402ee0d 0xc0200780
|
||||||
|
0x0402ce09 0xc0200780 0xdc04380d 0x20000780
|
||||||
|
0x1400ce01 0x0423c780 0xd8043809 0x20000780
|
||||||
|
0x1c00c005 0x0423c780 0x2440ee01 0x04200780
|
||||||
|
0x3801c005 0x6c20c780 0xd0010001 0x04020780
|
||||||
|
0x2400ce01 0x04200780 0x04000e01 0xe4200780
|
||||||
|
0x0402de0d 0xc0200780 0x0402ce09 0xc0200780
|
||||||
|
0xdc04380d 0x20000780 0x1400ce01 0x0423c780
|
||||||
|
0xd8043809 0x20000780 0x1d00e004 0x2540fe00
|
||||||
|
0x3801c005 0x6c20c780 0xd0010001 0x04020780
|
||||||
|
0x2400ce01 0x04200780 0x04000e01 0xe4200780
|
||||||
|
0x0402d60d 0xc0200780 0x0402ce09 0xc0200780
|
||||||
|
0xdc04380d 0x20000780 0x1400ce01 0x0423c780
|
||||||
|
0xd8043809 0x20000780 0x1d00e004 0x2540f600
|
||||||
|
0x3801c005 0x6c20c780 0xd0010001 0x04020780
|
||||||
|
0x2400ce01 0x04200780 0x04000e01 0xe4200780
|
||||||
|
0x0402d20d 0xc0200780 0x0402ce09 0xc0200780
|
||||||
|
0xdc04380d 0x20000780 0x1400ce01 0x0423c780
|
||||||
|
0xd8043809 0x20000780 0x1d00e004 0x2540f200
|
||||||
|
0x3801c005 0x6c20c780 0xd0010001 0x04020780
|
||||||
|
0x2400ce01 0x04200780 0x04000e01 0xe4200780
|
||||||
|
0x0402d00d 0xc0200780 0x0402ce09 0xc0200780
|
||||||
|
0xdc04380d 0x20000780 0x1400ce01 0x0423c780
|
||||||
|
0xd8043809 0x20000780 0x1d00e004 0x2540f000
|
||||||
|
0x3801c005 0x6c20c780 0xd0010001 0x04020780
|
||||||
|
0x2400ce01 0x04200780 0x04000e01 0xe4200780
|
||||||
|
0xf0000001 0xe0000002 0x861ffe03 0x00000000
|
||||||
|
0x308305fd 0x644107c8 0x30000003 0x00000280
|
||||||
|
0xa0004e05 0x04200780 0x1000cc01 0x0423c780
|
||||||
|
0x40020209 0x00000780 0x30100409 0xc4100780
|
||||||
|
0x60020001 0x00008780 0x3007ce09 0xc4300780
|
||||||
|
0x3006ce11 0xc4300780 0x30070015 0xc4100780
|
||||||
|
0x30060019 0xc4100780 0x30070201 0xc4100780
|
||||||
|
0x30060205 0xc4100780 0x20048408 0x20068a10
|
||||||
|
0x20018000 0x20028608 0x2104ea10 0x2100e804
|
||||||
|
0x20000401 0x04010780 0xd00e0001 0x80c00780
|
||||||
|
0x20000605 0x04004780 0xd00e0201 0xa0c00781
|
||||||
|
}
|
||||||
|
}
|
||||||
code {
|
code {
|
||||||
name = cudaComputeAutocor
|
name = cudaComputeAutocor
|
||||||
lmem = 0
|
lmem = 0
|
||||||
@@ -96,105 +183,121 @@ code {
|
|||||||
code {
|
code {
|
||||||
name = cudaEstimateResidual
|
name = cudaEstimateResidual
|
||||||
lmem = 0
|
lmem = 0
|
||||||
smem = 3624
|
smem = 3752
|
||||||
reg = 10
|
reg = 10
|
||||||
bar = 1
|
bar = 1
|
||||||
const {
|
const {
|
||||||
segname = const
|
segname = const
|
||||||
segnum = 1
|
segnum = 1
|
||||||
offset = 0
|
offset = 0
|
||||||
bytes = 12
|
bytes = 20
|
||||||
mem {
|
mem {
|
||||||
0x000003ff 0x0000000f 0x0000000e
|
0x000003ff 0x0000000f 0x0000001f 0x00000001
|
||||||
|
0x0000000e
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
bincode {
|
bincode {
|
||||||
0xd0800205 0x00400780 0xa0000209 0x04000780
|
0xd0800205 0x00400780 0xa0000209 0x04000780
|
||||||
0x30070405 0xc4100780 0x3006040d 0xc4100780
|
0x30070405 0xc4100780 0x3006040d 0xc4100780
|
||||||
0xa0000001 0x04000780 0x20000205 0x0400c780
|
0xa0000011 0x04000780 0x20000201 0x0400c780
|
||||||
0x30020015 0xc4100780 0x2000020d 0x04014780
|
0x30020805 0xc4100780 0x2000000d 0x04004780
|
||||||
0x308101fd 0x644107c8 0x00000205 0xc0000780
|
0x00000005 0xc0000780 0x308109fd 0x644107c8
|
||||||
0x00000609 0xc0000780 0xa0015003 0x00000000
|
0x00000609 0xc0000780 0xa0018003 0x00000000
|
||||||
0x10015003 0x00000280 0x10004409 0x0023c780
|
0xa0004401 0x04200780 0x10018003 0x00000280
|
||||||
0x60024e05 0x00208780 0x3007020d 0xc4100780
|
0x40014e0d 0x00200780 0x3010060d 0xc4100780
|
||||||
0x30060205 0xc4100780 0x20018604 0x2101ec04
|
0x60004e01 0x0020c780 0x20000001 0x04008780
|
||||||
0x20000a05 0x04004780 0xd00e0205 0x80c00780
|
0x3007000d 0xc4100780 0x30060001 0xc4100780
|
||||||
0x08041401 0xe4204780 0xf0000001 0xe0000002
|
0x20008600 0x2100ec00 0x20000201 0x04000780
|
||||||
0x861ffe03 0x00000000 0xa0004c11 0x04200780
|
0xd00e0001 0x80c00780 0x08045401 0xe4200780
|
||||||
0x1000d205 0x0423c780 0x40080619 0x00000780
|
0xf0000001 0xe0000002 0x861ffe03 0x00000000
|
||||||
0xa000420d 0x04200780 0x30100c19 0xc4100780
|
0xa0004c0d 0x04200780 0x1000d201 0x0423c780
|
||||||
0x60080411 0x00018780 0x40060a05 0x00000780
|
0x40060215 0x00000780 0x30100a15 0xc4100780
|
||||||
0x30100219 0xc4100780 0x1000d205 0x0423c780
|
0x6006000d 0x00014780 0x40054201 0x00200780
|
||||||
0x60060819 0x00018780 0x2101ee20 0x2144f004
|
0x30100015 0xc4100780 0x1000d201 0x0423c780
|
||||||
0x20000c1d 0x04000780 0x30080221 0xac000780
|
0x60044215 0x00214780 0x2100ee1c 0x2143f000
|
||||||
0x30080ffd 0x6c0187c8 0xd010580d 0x20000780
|
0x20000a19 0x04010780 0x3007001d 0xac000780
|
||||||
0x2c00c011 0x04210500 0x20000e11 0x04010500
|
0x30060ffd 0x6c00c7c8 0xa0030003 0x00000000
|
||||||
0x30020811 0xc4100500 0x2000ca11 0x04210500
|
0x1002f003 0x00000280 0xd011580d 0x20000780
|
||||||
0xd00e0811 0x80c00500 0x1000f811 0x0403c280
|
0x2d03e020 0x20088c20 0x30021021 0xc4100780
|
||||||
0x00020e0d 0xc0000780 0x0c001401 0xe4210780
|
0x2000ca21 0x04220780 0xd00e1021 0x80c00780
|
||||||
0xd410500d 0x20000780 0x3c00c005 0x04204780
|
0x10030003 0x00000780 0x1000f821 0x0403c780
|
||||||
0x3c7cc011 0x6c208780 0x3001d205 0xac200780
|
0x00020c0d 0xc0000782 0x0c001401 0xe4220780
|
||||||
0x307c0205 0x8c000780 0xd0040211 0x04020780
|
0x30820dfd 0x6c4107c8 0xa0043003 0x00000000
|
||||||
0xd4005005 0x20000780 0x861ffe03 0x00000000
|
0x10043003 0x00000280 0x2000d221 0x04218780
|
||||||
0x3000cffd 0x6420c7c8 0xa0045003 0x00000000
|
0x0002100d 0xc0000780 0x30080ffd 0x6c00c7c8
|
||||||
0x00020e0d 0xc0000780 0x0c021401 0xe43f0780
|
0xa0042003 0x00000000 0x10041003 0x00000280
|
||||||
0x10044003 0x00000280 0x10004409 0x0023c780
|
0xd0115811 0x20000780 0x2000c00d 0x0420c784
|
||||||
0x60024e05 0x00208780 0x30070221 0xc4100780
|
0x2106f21c 0x2007860c 0x3002060d 0xc4100780
|
||||||
0x30060205 0xc4100780 0x20019004 0x2101ec04
|
0x2000ca0d 0x0420c780 0xd00e060d 0x80c00780
|
||||||
0x20000a05 0x04004780 0x20008205 0x00000007
|
0x10042003 0x00000780 0x1000f80d 0x0403c780
|
||||||
0xd00e0205 0x80c00780 0x10045003 0x00000780
|
0x0c001401 0xe420c782 0xd411500d 0x20000782
|
||||||
0x1000f805 0x0403c780 0x307c09fd 0x640087ca
|
0x3c00c00d 0x04200780 0xd4005005 0x20000780
|
||||||
0x08043401 0xe4204780 0xa0072003 0x00000000
|
0x861ffe03 0x00000000 0x3004cffd 0x6420c7c8
|
||||||
0x10000005 0x0403c780 0x10072003 0x00000280
|
0xa0059003 0x00000000 0x00020c0d 0xc0000780
|
||||||
0xd4100009 0x20000780 0x387cc1fd 0x6c20c7c8
|
0x0c025401 0xe43f0780 0x10058003 0x00000280
|
||||||
0xa005e003 0x00000000 0x1000f815 0x0403c780
|
0xa0004401 0x04200780 0x40014e1d 0x00200780
|
||||||
0x1000f825 0x0403c780 0x1005e003 0x00000280
|
0x30100e1d 0xc4100780 0x60004e01 0x0021c780
|
||||||
0xa005c003 0x00000000 0xd4000009 0x20000780
|
0x20000001 0x04008780 0x3007001d 0xc4100780
|
||||||
0x20000221 0x04014780 0x0002100d 0xc0000780
|
0x30060001 0xc4100780 0x20008e00 0x2100ec00
|
||||||
0xd8108011 0x20000780 0x1000c021 0x0423c784
|
0x20000201 0x04000780 0x20008001 0x00000007
|
||||||
0x6c08d425 0x80224780 0xd410000d 0x20000780
|
0xd00e0001 0x80c00780 0x10059003 0x00000780
|
||||||
0x20018a15 0x00000003 0x3c05c1fd 0x6c2147c8
|
0x1000f801 0x0403c780 0x08047401 0xe4200782
|
||||||
0xd8000809 0x20000780 0x10052003 0x00000280
|
0xd4112809 0x20000780 0x3883c005 0x6c608780
|
||||||
0xd4100009 0x20000782 0x1800c015 0x0423c780
|
0xa0004401 0x04200780 0xd001001d 0x04000780
|
||||||
0x20000215 0x04014782 0x00020a09 0xc0000780
|
0x30000ffd 0x640187c8 0xa0093003 0x00000000
|
||||||
0xd410100d 0x20000780 0x1c00c015 0x0423c780
|
0x10091003 0x00000280 0x3003d201 0xac200780
|
||||||
0x30051215 0xec000780 0x2840d415 0x04214780
|
0x307c000d 0x8c000780 0xd4110009 0x20000780
|
||||||
0x301f0a21 0xec100780 0x30010a15 0xc4100780
|
0x387cc1fd 0x6c20c7c8 0xa007c003 0x00000000
|
||||||
0xd0051021 0x04008780 0x00020e0d 0xc0000780
|
0x1000f801 0x0403c780 0x1000f825 0x0403c780
|
||||||
0xdc085009 0x20000780 0x30010825 0x6c010780
|
0x1007a003 0x00000280 0x30050e05 0xc4100780
|
||||||
0x1800c015 0x0423c780 0xa0001225 0x2c014780
|
0x20000221 0x04010780 0x200a9005 0x00000003
|
||||||
0x60081215 0x80014780 0x20000205 0x0400c780
|
0x0002020d 0xc0000780 0xa0077003 0x00000000
|
||||||
0x0c021401 0xe4214780 0x20400215 0x04000780
|
0xd4000009 0x20000780 0xd8118011 0x20000780
|
||||||
0x300509fd 0x640107c8 0x1004a003 0x00000280
|
0x1000c005 0x0423c784 0x6e01c225 0x80224780
|
||||||
0x00020e0d 0xc0000782 0xdc085011 0x20000780
|
0x20018001 0x00000003 0xd4110011 0x20000780
|
||||||
0x1000e005 0x0423c784 0x2000c005 0x04204784
|
0x3000c1fd 0x6c2147cc 0xd8000809 0x20000780
|
||||||
0x0c021401 0xe4204780 0x1000d005 0x0423c784
|
0x1006f003 0x00000280 0xd4110009 0x20000782
|
||||||
0x2000c005 0x04204784 0x0c021401 0xe4204780
|
0x1800c001 0x0423c780 0x1007c003 0x00000780
|
||||||
0x1000c805 0x0423c784 0x2000c005 0x04204784
|
0x30050e05 0xc4100780 0x20000221 0x04010780
|
||||||
0x0c021401 0xe4204780 0x1000c405 0x0423c784
|
0x20001001 0x04000782 0x00020009 0xc0000780
|
||||||
0x2000c005 0x04204784 0x0c021401 0xe4204780
|
0xd411100d 0x20000780 0x1c00c001 0x0423c780
|
||||||
0x20018005 0x00000003 0x1000c20d 0x0423c784
|
0x30001201 0xec000780 0x2840d401 0x04200780
|
||||||
0x2000c00d 0x0420c784 0x40031015 0x00000780
|
0x301f0005 0xec100780 0x30010001 0xc4100780
|
||||||
0x0c021401 0xe420c780 0x6002120d 0x00014780
|
0xd0000205 0x04008780 0x30080601 0x6c010780
|
||||||
0x30820021 0x64410780 0x00020c09 0xc0000780
|
0x00020c0d 0xc0000780 0xdc095009 0x20000780
|
||||||
0xd8085009 0x20000780 0x30100615 0xc4100780
|
0xa0000021 0x2c014780 0x1800c001 0x0423c780
|
||||||
0xa0001019 0x2c014780 0x3001080d 0xec100780
|
0x60011005 0x80000780 0x20018e1d 0x00000003
|
||||||
0x60021011 0x00014780 0x407f8c15 0x0007ffff
|
0xa0004401 0x04200780 0x30000ffd 0x640147c8
|
||||||
0x2943e004 0x2005880c 0x30000205 0xec000780
|
0x0c025401 0xe4204780 0x10063003 0x00000280
|
||||||
0x20000205 0x0400c780 0x0c021401 0xe4204780
|
0x10093003 0x00000780 0x3003d201 0xac200780
|
||||||
0x1000d005 0x0423c784 0x3001c005 0xac200784
|
0x307c000d 0x8c000780 0x00020c0d 0xc0000782
|
||||||
0x0c021401 0xe4204780 0x1000c805 0x0423c784
|
0xdc095005 0x20000780 0x1400e001 0x0423c780
|
||||||
0x3001c005 0xac200784 0x0c021401 0xe4204780
|
0x2400c001 0x04200780 0x0c025401 0xe4200780
|
||||||
0x1000c405 0x0423c784 0x3001c005 0xac200784
|
0x1500f000 0x2500e000 0x0c025401 0xe4200780
|
||||||
0x0c021401 0xe4204780 0x1000c205 0x0423c784
|
0x1500e800 0x2500e000 0x0c025401 0xe4200780
|
||||||
0x3001c005 0xac200784 0x307c01fd 0x640147c8
|
0x1500e400 0x2500e000 0x0c025401 0xe4200780
|
||||||
0x0c021401 0xe4204780 0x30000003 0x00000280
|
0x1400c205 0x0423c780 0x20018801 0x00000003
|
||||||
0xd4100005 0x20000780 0x347cc1fd 0x6c2087c8
|
0x2400c005 0x04204780 0x0c025401 0xe4204780
|
||||||
0x30000003 0x00000280 0x10004401 0x0023c780
|
0x40010c05 0x00000780 0x60000e05 0x00004780
|
||||||
0x60004e01 0x00208780 0x40014805 0x00200780
|
0x30100205 0xc4100780 0x60000c01 0x00004780
|
||||||
|
0x30840805 0x64410780 0x00020a09 0xc0000780
|
||||||
|
0xd8095009 0x20000780 0xa0000215 0x2c014780
|
||||||
|
0x30010605 0xec100780 0x407f8a0d 0x0007ffff
|
||||||
|
0x2941e004 0x2003800c 0x30040201 0xec000780
|
||||||
|
0x20000001 0x0400c780 0x0c025401 0xe4200780
|
||||||
|
0x1400d001 0x0423c780 0x3400c001 0xac200780
|
||||||
|
0x0c025401 0xe4200780 0x1400c801 0x0423c780
|
||||||
|
0x3400c001 0xac200780 0x0c025401 0xe4200780
|
||||||
|
0x1400c401 0x0423c780 0x3400c001 0xac200780
|
||||||
|
0x0c025401 0xe4200780 0x1400c201 0x0423c780
|
||||||
|
0x3400c001 0xac200780 0x307c09fd 0x640147c8
|
||||||
|
0x0c025401 0xe4200780 0x30000003 0x00000280
|
||||||
|
0xa0004401 0x04200780 0x40014e05 0x00200780
|
||||||
|
0x30100205 0xc4100780 0x60004e01 0x00204780
|
||||||
|
0x20000001 0x04008780 0x40014805 0x00200780
|
||||||
0x30100205 0xc4100780 0x60004801 0x00204780
|
0x30100205 0xc4100780 0x60004801 0x00204780
|
||||||
0xa0004c11 0x04200780 0x20000001 0x04010780
|
0xa0004c0d 0x04200780 0x20000001 0x0400c780
|
||||||
0x00020e0d 0xc0000780 0xdc085005 0x20000780
|
0x00020c0d 0xc0000780 0xdc095005 0x20000780
|
||||||
0x30020005 0xc4100780 0x1500e000 0x2101e804
|
0x30020005 0xc4100780 0x1500e000 0x2101e804
|
||||||
0xd00e0201 0xa0c00781
|
0xd00e0201 0xa0c00781
|
||||||
}
|
}
|
||||||
@@ -202,11 +305,62 @@ code {
|
|||||||
code {
|
code {
|
||||||
name = cudaEncodeResidual
|
name = cudaEncodeResidual
|
||||||
lmem = 0
|
lmem = 0
|
||||||
smem = 36
|
smem = 1372
|
||||||
reg = 0
|
reg = 6
|
||||||
bar = 0
|
bar = 1
|
||||||
|
const {
|
||||||
|
segname = const
|
||||||
|
segnum = 1
|
||||||
|
offset = 0
|
||||||
|
bytes = 8
|
||||||
|
mem {
|
||||||
|
0x000000bf 0x0000001f
|
||||||
|
}
|
||||||
|
}
|
||||||
bincode {
|
bincode {
|
||||||
0xf0000001 0xe0000001
|
0xa0000005 0x04000780 0x308003fd 0x644107c8
|
||||||
|
0xa000d003 0x00000000 0x1000d003 0x00000280
|
||||||
|
0xa0004e01 0x04200780 0x30070009 0xc4100780
|
||||||
|
0x30060001 0xc4100780 0x20000401 0x04000780
|
||||||
|
0x30020209 0xc4100780 0x2100ec00 0x20008400
|
||||||
|
0xd00e0001 0x80c00780 0x00020205 0xc0000780
|
||||||
|
0x04024e01 0xe4200780 0xf0000001 0xe0000002
|
||||||
|
0x861ffe03 0x00000000 0xa0004201 0x04200780
|
||||||
|
0x40014c09 0x00200780 0x30100409 0xc4100780
|
||||||
|
0xd0093805 0x20000780 0x60004c09 0x00208780
|
||||||
|
0x2500e00c 0x2542ee10 0x3004060d 0xac000780
|
||||||
|
0x300107fd 0x6c00c7c8 0xa0020003 0x00000000
|
||||||
|
0x1001f003 0x00000280 0xd0094005 0x20000780
|
||||||
|
0x2502e010 0x20048210 0x30020811 0xc4100780
|
||||||
|
0x2000ca11 0x04210780 0xd00e0811 0x80c00780
|
||||||
|
0x10020003 0x00000780 0x1000f811 0x0403c780
|
||||||
|
0x00020205 0xc0000782 0x308103fd 0x6c4107c8
|
||||||
|
0x04000e01 0xe4210780 0xa0033003 0x00000000
|
||||||
|
0x10033003 0x00000280 0x20000011 0x04004780
|
||||||
|
0x300309fd 0x6c0187c8 0x00020805 0xc0000780
|
||||||
|
0xa0032003 0x00000000 0x10031003 0x00000280
|
||||||
|
0xd0094009 0x20000780 0x2001800c 0x2902e010
|
||||||
|
0x2000060d 0x04010780 0x3002060d 0xc4100780
|
||||||
|
0x2000ca0d 0x0420c780 0xd00e060d 0x80c00780
|
||||||
|
0x10032003 0x00000780 0x1000f80d 0x0403c780
|
||||||
|
0x04000e01 0xe420c782 0xd0093805 0x20000782
|
||||||
|
0x2542ee0c 0x3503e00c 0x30030001 0xac000780
|
||||||
|
0x307c0011 0x8c000780 0x861ffe03 0x00000000
|
||||||
|
0xd0093805 0x20000780 0x347cc1fd 0x6c20c7c8
|
||||||
|
0x1000f80d 0x0403c780 0x1400c001 0x0423c780
|
||||||
|
0x1004b003 0x00000280 0x101c8001 0x00000003
|
||||||
|
0x00000005 0xc0000780 0x1000f815 0x0403c780
|
||||||
|
0x20000a01 0x04004780 0xd409800d 0x20000780
|
||||||
|
0x00020009 0xc0000780 0xd0093811 0x20000780
|
||||||
|
0x20018a15 0x00000003 0x1c00c001 0x0423c780
|
||||||
|
0x3005c1fd 0x6c2147cc 0x6800ce0d 0x8020c780
|
||||||
|
0xd4000805 0x20000780 0x1000c001 0x0423c784
|
||||||
|
0x10040003 0x00000280 0x300109fd 0x6c00c7c8
|
||||||
|
0x30000003 0x00000280 0xd0094005 0x20000780
|
||||||
|
0x2502e008 0x20008210 0x1500e200 0x20028204
|
||||||
|
0x00020805 0xc0000780 0x30000609 0xec000780
|
||||||
|
0x30020201 0xc4100780 0x2542ee04 0x2100e800
|
||||||
|
0xd00e0005 0xa0c00781
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
code {
|
code {
|
||||||
@@ -422,15 +576,16 @@ code {
|
|||||||
name = cudaSumResidual
|
name = cudaSumResidual
|
||||||
lmem = 0
|
lmem = 0
|
||||||
smem = 1248
|
smem = 1248
|
||||||
reg = 4
|
reg = 5
|
||||||
bar = 1
|
bar = 1
|
||||||
const {
|
const {
|
||||||
segname = const
|
segname = const
|
||||||
segnum = 1
|
segnum = 1
|
||||||
offset = 0
|
offset = 0
|
||||||
bytes = 8
|
bytes = 20
|
||||||
mem {
|
mem {
|
||||||
0x0000002f 0x0000001f
|
0x0000002f 0x0000001f 0x00000008 0x00000020
|
||||||
|
0x00000001
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
bincode {
|
bincode {
|
||||||
@@ -460,9 +615,26 @@ code {
|
|||||||
0x2400d401 0x04200780 0x04001001 0xe4200780
|
0x2400d401 0x04200780 0x04001001 0xe4200780
|
||||||
0x2400d201 0x04200780 0x307c03fd 0x6c0147c8
|
0x2400d201 0x04200780 0x307c03fd 0x6c0147c8
|
||||||
0x04001001 0xe4200780 0x30000003 0x00000280
|
0x04001001 0xe4200780 0x30000003 0x00000280
|
||||||
0xa0004e01 0x04200780 0x30070005 0xc4100780
|
0xd0086805 0x20000780 0x3482c1fd 0x6c6147c8
|
||||||
0x30060001 0xc4100780 0x20008200 0x2100e804
|
0x10040003 0x00000280 0xd0084005 0x20000780
|
||||||
0x1000d001 0x0423c780 0x20108205 0x00000003
|
0x1500ec00 0x1500e004 0x40030009 0x00000780
|
||||||
0xd00e0201 0xa0c00781
|
0x60020209 0x00008780 0x30100409 0xc4100780
|
||||||
|
0x60020001 0x00008780 0x2000d001 0x04200780
|
||||||
|
0x20068001 0x00000003 0x10058003 0x00000780
|
||||||
|
0xd0086805 0x20000780 0x3483c1fd 0x6c6147c8
|
||||||
|
0x10052003 0x00000280 0xd0084005 0x20000780
|
||||||
|
0x1500e604 0x1500e000 0x2400cc05 0x04204780
|
||||||
|
0x3002ce0d 0xc4300780 0x40030009 0x00000780
|
||||||
|
0x301f0611 0xec100780 0x60020209 0x00008780
|
||||||
|
0xd0840811 0x04400780 0x30100409 0xc4100780
|
||||||
|
0x2000080d 0x0400c780 0x60020001 0x00008780
|
||||||
|
0x30010605 0xec100780 0x20018000 0x2100f000
|
||||||
|
0x200f8001 0x00000003 0x10058003 0x00000780
|
||||||
|
0xd0087005 0x20000780 0x1500e000 0x1500e204
|
||||||
|
0x40030009 0x00000780 0x60020209 0x00008780
|
||||||
|
0x30100409 0xc4100780 0x60020001 0x00008780
|
||||||
|
0xa0004e05 0x04200780 0x30070209 0xc4100780
|
||||||
|
0x30060205 0xc4100780 0x20018404 0x2101e804
|
||||||
|
0x20108205 0x00000003 0xd00e0201 0xa0c00781
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user