optimizations

This commit is contained in:
chudov
2009-10-08 13:32:51 +00:00
parent e21ad1591d
commit 9a1a1956af
3 changed files with 899 additions and 430 deletions

View File

@@ -127,7 +127,7 @@ namespace CUETools.Codecs.FlaCuda
windowBuffer = new float[FlaCudaWriter.MAX_BLOCKSIZE * lpc.MAX_LPC_WINDOWS]; windowBuffer = new float[FlaCudaWriter.MAX_BLOCKSIZE * lpc.MAX_LPC_WINDOWS];
md5_buffer = new byte[FlaCudaWriter.MAX_BLOCKSIZE * channels * bits_per_sample / 8]; md5_buffer = new byte[FlaCudaWriter.MAX_BLOCKSIZE * channels * bits_per_sample / 8];
eparams.flake_set_defaults(_compressionLevel); eparams.flake_set_defaults(_compressionLevel, encode_on_cpu);
eparams.padding_size = 8192; eparams.padding_size = 8192;
crc8 = new Crc8(); crc8 = new Crc8();
@@ -165,7 +165,7 @@ namespace CUETools.Codecs.FlaCuda
if (value < 0 || value > 11) if (value < 0 || value > 11)
throw new Exception("unsupported compression level"); throw new Exception("unsupported compression level");
_compressionLevel = value; _compressionLevel = value;
eparams.flake_set_defaults(_compressionLevel); eparams.flake_set_defaults(_compressionLevel, encode_on_cpu);
} }
} }
@@ -178,6 +178,7 @@ namespace CUETools.Codecs.FlaCuda
set set
{ {
encode_on_cpu = !value; encode_on_cpu = !value;
eparams.flake_set_defaults(_compressionLevel, encode_on_cpu);
} }
} }
@@ -841,7 +842,7 @@ namespace CUETools.Codecs.FlaCuda
if ((eparams.window_function & flag) == 0 || _windowcount == lpc.MAX_LPC_WINDOWS) if ((eparams.window_function & flag) == 0 || _windowcount == lpc.MAX_LPC_WINDOWS)
return; return;
func(window + _windowcount * FlaCudaWriter.MAX_BLOCKSIZE, _windowsize); func(window + _windowcount * _windowsize, _windowsize);
//int sz = _windowsize; //int sz = _windowsize;
//float* pos = window + _windowcount * FlaCudaWriter.MAX_BLOCKSIZE * 2; //float* pos = window + _windowcount * FlaCudaWriter.MAX_BLOCKSIZE * 2;
//do //do
@@ -858,7 +859,8 @@ namespace CUETools.Codecs.FlaCuda
unsafe void initializeSubframeTasks(int blocksize, int channelsCount, int nFrames, FlaCudaTask task) unsafe void initializeSubframeTasks(int blocksize, int channelsCount, int nFrames, FlaCudaTask task)
{ {
task.nResidualTasks = 0; task.nResidualTasks = 0;
task.nResidualTasksPerChannel = (_windowcount * eparams.max_prediction_order + 1 + (eparams.do_constant ? 1 : 0) + eparams.max_fixed_order - eparams.min_fixed_order + 7) & ~7; task.nTasksPerWindow = Math.Min(eparams.max_prediction_order, eparams.orders_per_window);
task.nResidualTasksPerChannel = (_windowcount * task.nTasksPerWindow + 1 + (eparams.do_constant ? 1 : 0) + eparams.max_fixed_order - eparams.min_fixed_order + 7) & ~7;
task.nAutocorTasksPerChannel = _windowcount; task.nAutocorTasksPerChannel = _windowcount;
for (int iFrame = 0; iFrame < nFrames; iFrame++) for (int iFrame = 0; iFrame < nFrames; iFrame++)
{ {
@@ -867,17 +869,16 @@ namespace CUETools.Codecs.FlaCuda
for (int iWindow = 0; iWindow < _windowcount; iWindow++) for (int iWindow = 0; iWindow < _windowcount; iWindow++)
{ {
// LPC tasks // LPC tasks
for (int order = 1; order <= eparams.max_prediction_order; order++) for (int order = 0; order < task.nTasksPerWindow; order++)
{ {
task.ResidualTasks[task.nResidualTasks].type = (int)SubframeType.LPC; task.ResidualTasks[task.nResidualTasks].type = (int)SubframeType.LPC;
task.ResidualTasks[task.nResidualTasks].channel = ch; task.ResidualTasks[task.nResidualTasks].channel = ch;
task.ResidualTasks[task.nResidualTasks].obits = (int)bits_per_sample + (channels == 2 && ch == 3 ? 1 : 0); task.ResidualTasks[task.nResidualTasks].obits = (int)bits_per_sample + (channels == 2 && ch == 3 ? 1 : 0);
task.ResidualTasks[task.nResidualTasks].abits = task.ResidualTasks[task.nResidualTasks].obits; task.ResidualTasks[task.nResidualTasks].abits = task.ResidualTasks[task.nResidualTasks].obits;
task.ResidualTasks[task.nResidualTasks].blocksize = blocksize; task.ResidualTasks[task.nResidualTasks].blocksize = blocksize;
task.ResidualTasks[task.nResidualTasks].residualOrder = order; task.ResidualTasks[task.nResidualTasks].residualOrder = order + 1;
task.ResidualTasks[task.nResidualTasks].samplesOffs = ch * FlaCudaWriter.MAX_BLOCKSIZE + iFrame * blocksize; task.ResidualTasks[task.nResidualTasks].samplesOffs = ch * FlaCudaWriter.MAX_BLOCKSIZE + iFrame * blocksize;
task.ResidualTasks[task.nResidualTasks].residualOffs = task.ResidualTasks[task.nResidualTasks].samplesOffs; task.ResidualTasks[task.nResidualTasks].residualOffs = task.ResidualTasks[task.nResidualTasks].samplesOffs;
task.ResidualTasks[task.nResidualTasks].windowOffs = iWindow * FlaCudaWriter.MAX_BLOCKSIZE;
task.nResidualTasks++; task.nResidualTasks++;
} }
} }
@@ -1160,9 +1161,10 @@ namespace CUETools.Codecs.FlaCuda
cuda.SetParameter(task.cudaComputeLPC, 1 * sizeof(uint), (uint)task.nResidualTasksPerChannel); cuda.SetParameter(task.cudaComputeLPC, 1 * sizeof(uint), (uint)task.nResidualTasksPerChannel);
cuda.SetParameter(task.cudaComputeLPC, 2 * sizeof(uint), (uint)task.cudaAutocorOutput.Pointer); cuda.SetParameter(task.cudaComputeLPC, 2 * sizeof(uint), (uint)task.cudaAutocorOutput.Pointer);
cuda.SetParameter(task.cudaComputeLPC, 3 * sizeof(uint), (uint)eparams.max_prediction_order); cuda.SetParameter(task.cudaComputeLPC, 3 * sizeof(uint), (uint)eparams.max_prediction_order);
cuda.SetParameter(task.cudaComputeLPC, 4 * sizeof(uint), (uint)autocorPartCount); cuda.SetParameter(task.cudaComputeLPC, 4 * sizeof(uint), (uint)task.nTasksPerWindow);
cuda.SetParameterSize(task.cudaComputeLPC, 5U * sizeof(uint)); cuda.SetParameter(task.cudaComputeLPC, 5 * sizeof(uint), (uint)autocorPartCount);
cuda.SetFunctionBlockShape(task.cudaComputeLPC, (autocorPartCount + 31) & ~31, 1, 1); cuda.SetParameterSize(task.cudaComputeLPC, 6U * sizeof(uint));
cuda.SetFunctionBlockShape(task.cudaComputeLPC, 32, 8, 1);
cuda.SetParameter(task.cudaComputeLPCLattice, 0, (uint)task.cudaResidualTasks.Pointer); cuda.SetParameter(task.cudaComputeLPCLattice, 0, (uint)task.cudaResidualTasks.Pointer);
cuda.SetParameter(task.cudaComputeLPCLattice, 1 * sizeof(uint), (uint)task.nResidualTasksPerChannel); cuda.SetParameter(task.cudaComputeLPCLattice, 1 * sizeof(uint), (uint)task.nResidualTasksPerChannel);
@@ -1766,6 +1768,8 @@ namespace CUETools.Codecs.FlaCuda
// valid values are 1 to 32 // valid values are 1 to 32
public int max_prediction_order; public int max_prediction_order;
public int orders_per_window;
// minimum fixed prediction order // minimum fixed prediction order
// set by user prior to calling flake_encode_init // set by user prior to calling flake_encode_init
// if set to less than 0, it is chosen based on compression. // if set to less than 0, it is chosen based on compression.
@@ -1813,7 +1817,7 @@ namespace CUETools.Codecs.FlaCuda
public bool do_verify; public bool do_verify;
public bool do_seektable; public bool do_seektable;
public int flake_set_defaults(int lvl) public int flake_set_defaults(int lvl, bool encode_on_cpu)
{ {
compression = lvl; compression = lvl;
@@ -1841,68 +1845,91 @@ namespace CUETools.Codecs.FlaCuda
do_seektable = true; do_seektable = true;
do_wasted = true; do_wasted = true;
do_constant = true; do_constant = true;
orders_per_window = 32;
// differences from level 7 // differences from level 7
switch (lvl) switch (lvl)
{ {
case 0: case 0:
do_constant = false;
do_wasted = false; do_wasted = false;
do_midside = false; do_midside = false;
orders_per_window = 1;
max_partition_order = 4; max_partition_order = 4;
max_prediction_order = 4; max_prediction_order = 7;
min_fixed_order = 3; min_fixed_order = 2;
max_fixed_order = 2; max_fixed_order = 2;
break; break;
case 1: case 1:
window_function = WindowFunction.Bartlett;
do_wasted = false; do_wasted = false;
do_midside = false; do_midside = false;
orders_per_window = 1;
max_prediction_order = 12;
max_partition_order = 4; max_partition_order = 4;
max_prediction_order = 5;
break; break;
case 2: case 2:
window_function = WindowFunction.Bartlett; window_function = WindowFunction.Bartlett;
max_partition_order = 4;
min_fixed_order = 2; min_fixed_order = 2;
max_fixed_order = 2; max_fixed_order = 2;
max_prediction_order = 6; orders_per_window = 1;
max_prediction_order = 7;
max_partition_order = 4;
break; break;
case 3: case 3:
window_function = WindowFunction.Bartlett; window_function = WindowFunction.Bartlett;
max_partition_order = 4;
min_fixed_order = 2; min_fixed_order = 2;
max_fixed_order = 1; max_fixed_order = 2;
max_prediction_order = 7; orders_per_window = 3;
max_prediction_order = 8;
max_partition_order = 4;
break; break;
case 4: case 4:
min_fixed_order = 2;
max_fixed_order = 2;
orders_per_window = 3;
max_partition_order = 4; max_partition_order = 4;
max_prediction_order = 8; max_prediction_order = 8;
break; break;
case 5: case 5:
max_prediction_order = 9; min_fixed_order = 2;
max_fixed_order = 2;
orders_per_window = 3;
break; break;
case 6: case 6:
min_fixed_order = 2;
max_fixed_order = 2; max_fixed_order = 2;
max_prediction_order = 10; orders_per_window = 7;
break; break;
case 7: case 7:
min_fixed_order = 2; min_fixed_order = 2;
max_fixed_order = 2; max_fixed_order = 2;
max_prediction_order = 11; orders_per_window = 11;
break; break;
case 8: case 8:
break; break;
case 9: case 9:
max_prediction_order = 16; min_fixed_order = 2;
max_fixed_order = 2;
orders_per_window = 3;
max_prediction_order = 32;
break; break;
case 10: case 10:
max_prediction_order = 24; min_fixed_order = 2;
max_fixed_order = 2;
orders_per_window = 7;
max_prediction_order = 32;
break; break;
case 11: case 11:
min_fixed_order = 2;
max_fixed_order = 2;
orders_per_window = 11;
max_prediction_order = 32; max_prediction_order = 32;
break; break;
} }
if (!encode_on_cpu)
max_partition_order = 8;
return 0; return 0;
} }
} }
@@ -1923,8 +1950,7 @@ namespace CUETools.Codecs.FlaCuda
public int wbits; public int wbits;
public int abits; public int abits;
public int porder; public int porder;
public int windowOffs; public fixed int reserved[2];
public fixed int reserved[1];
public fixed int coefs[32]; public fixed int coefs[32];
}; };
@@ -1977,6 +2003,7 @@ namespace CUETools.Codecs.FlaCuda
public int samplesBufferLen; public int samplesBufferLen;
public int nResidualTasks = 0; public int nResidualTasks = 0;
public int nResidualTasksPerChannel = 0; public int nResidualTasksPerChannel = 0;
public int nTasksPerWindow = 0;
public int nAutocorTasksPerChannel = 0; public int nAutocorTasksPerChannel = 0;
public int max_porder = 0; public int max_porder = 0;

View File

@@ -44,8 +44,7 @@ typedef struct
int wbits; int wbits;
int abits; int abits;
int porder; int porder;
int windowOffs; int reserved[2];
int reserved[1];
} FlaCudaSubframeData; } FlaCudaSubframeData;
typedef struct typedef struct
@@ -161,22 +160,31 @@ extern "C" __global__ void cudaComputeAutocor(
volatile float product[256]; volatile float product[256];
FlaCudaSubframeData task; FlaCudaSubframeData task;
volatile float result[33]; volatile float result[33];
volatile int dataPos;
volatile int dataLen;
volatile int windowOffs;
volatile int samplesOffs;
//volatile int resultOffs;
} shared; } shared;
const int tid = threadIdx.x + (threadIdx.y * 32); const int tid = threadIdx.x + (threadIdx.y * 32);
// fetch task data // fetch task data
if (tid < sizeof(shared.task) / sizeof(int)) if (tid < sizeof(shared.task) / sizeof(int))
((int*)&shared.task)[tid] = ((int*)(tasks + __mul24(taskCount, blockIdx.y >> windowcount) + __mul24(max_order, blockIdx.y & ((1 << windowcount)-1))))[tid]; ((int*)&shared.task)[tid] = ((int*)(tasks + __mul24(taskCount, blockIdx.y >> windowcount)))[tid];
if (tid == 0)
{
shared.dataPos = __mul24(blockIdx.x, 15) * 32;
shared.windowOffs = __mul24(blockIdx.y & ((1 << windowcount)-1), shared.task.blocksize) + shared.dataPos;
shared.samplesOffs = shared.task.samplesOffs + shared.dataPos;
shared.dataLen = min(shared.task.blocksize - shared.dataPos, 15 * 32 + max_order);
}
//if (tid == 32)
//shared.resultOffs = __mul24(blockIdx.x + __mul24(blockIdx.y, gridDim.x), max_order + 1);
__syncthreads(); __syncthreads();
// fetch samples // fetch samples
{ shared.data[tid] = tid < shared.dataLen ? samples[shared.samplesOffs + tid] * window[shared.windowOffs + tid]: 0.0f;
const int pos = __mul24(blockIdx.x, 15) * 32; int tid2 = tid + 256;
const int dataLen = min(shared.task.blocksize - pos, 15 * 32 + max_order); shared.data[tid2] = tid2 < shared.dataLen ? samples[shared.samplesOffs + tid2] * window[shared.windowOffs + tid2]: 0.0f;
const int pos2 = pos + tid;
shared.data[tid] = tid < dataLen ? samples[shared.task.samplesOffs + pos2] * window[shared.task.windowOffs + pos2]: 0.0f;
shared.data[tid + 256] = tid + 256 < dataLen ? samples[shared.task.samplesOffs + pos2 + 256] * window[shared.task.windowOffs + pos2 + 256]: 0.0f;
}
__syncthreads(); __syncthreads();
const int ptr = __mul24(threadIdx.x, 15); const int ptr = __mul24(threadIdx.x, 15);
@@ -208,7 +216,7 @@ extern "C" __global__ void cudaComputeAutocor(
} }
__syncthreads(); __syncthreads();
if (tid <= max_order) if (tid <= max_order)
output[(blockIdx.x + blockIdx.y * gridDim.x) * (max_order + 1) + tid] = shared.result[tid]; output[__mul24(blockIdx.x + __mul24(blockIdx.y, gridDim.x), max_order + 1) + tid] = shared.result[tid];
} }
extern "C" __global__ void cudaComputeLPC( extern "C" __global__ void cudaComputeLPC(
@@ -216,100 +224,292 @@ extern "C" __global__ void cudaComputeLPC(
int taskCount, // tasks per block int taskCount, // tasks per block
float*autoc, float*autoc,
int max_order, // should be <= 32 int max_order, // should be <= 32
int taskCount2, // tasks per window function, should be <= max_order
int partCount // should be <= blockDim? int partCount // should be <= blockDim?
) )
{ {
__shared__ struct { __shared__ struct {
FlaCudaSubframeData task; FlaCudaSubframeData task;
union
{
volatile float parts[256];
volatile int tmpi[256];
};
volatile float lpc[33*16];
volatile float ldr[32]; volatile float ldr[32];
volatile int bits[32];
volatile float autoc[33];
volatile float gen0[32];
volatile float gen1[32]; volatile float gen1[32];
volatile float parts[128]; volatile float autoc[33];
volatile float error[64];
volatile float order[64];
//volatile float reff[32]; //volatile float reff[32];
//int cbits; //int cbits;
} shared; } shared;
const int tid = threadIdx.x; const int tid = threadIdx.x + threadIdx.y * 32;
// fetch task data // fetch task data
if (tid < sizeof(shared.task) / sizeof(int)) if (tid < sizeof(shared.task) / sizeof(int))
((int*)&shared.task)[tid] = ((int*)(tasks + blockIdx.x * max_order + blockIdx.y * taskCount))[tid]; ((int*)&shared.task)[tid] = ((int*)(tasks + blockIdx.y * taskCount))[tid];
__syncthreads(); __syncthreads();
// add up parts // add up autocorrelation parts
for (int order = 0; order <= max_order; order++) for (int order = threadIdx.y; order <= max_order; order += 8)
{ {
shared.parts[tid] = tid < partCount ? autoc[((blockIdx.y * gridDim.x + blockIdx.x) * partCount + tid) * (max_order + 1) + order] : 0; shared.parts[tid] = 0.0f;
__syncthreads(); for (int pos = threadIdx.x; pos < partCount; pos += 32)
if (tid < 64 && blockDim.x > 64) shared.parts[tid] += shared.parts[tid + 64]; shared.parts[tid] += autoc[((blockIdx.y * gridDim.x + blockIdx.x) * partCount + pos) * (max_order + 1) + order];
__syncthreads(); shared.parts[tid] = shared.parts[tid] + shared.parts[tid + 8] + shared.parts[tid + 16] + shared.parts[tid + 24];
if (tid < 32) shared.parts[tid] = shared.parts[tid] + shared.parts[tid + 2] + shared.parts[tid + 4] + shared.parts[tid + 6];
{ if (threadIdx.x == 0)
if (blockDim.x > 32) shared.parts[tid] += shared.parts[tid + 32]; shared.autoc[order] = shared.parts[tid] + shared.parts[tid + 1];
shared.parts[tid] += shared.parts[tid + 16];
shared.parts[tid] += shared.parts[tid + 8];
shared.parts[tid] += shared.parts[tid + 4];
shared.parts[tid] += shared.parts[tid + 2];
shared.parts[tid] += shared.parts[tid + 1];
if (tid == 0)
shared.autoc[order] = shared.parts[0];
}
} }
__syncthreads();
if (tid < 32) // Compute LPC using Schur and Levinson-Durbin recursion
if (threadIdx.y == 0)
{ {
shared.gen0[tid] = shared.autoc[tid+1]; float gen0 = shared.gen1[tid] = shared.autoc[tid+1];
shared.gen1[tid] = shared.autoc[tid+1];
shared.ldr[tid] = 0.0f; shared.ldr[tid] = 0.0f;
float error = shared.autoc[0]; float error = shared.autoc[0];
for (int order = 0; order < max_order; order++) for (int order = 0; order < max_order; order++)
{ {
// Schur recursion // Schur recursion
float reff = -shared.gen1[0] / error; float reff = -shared.gen1[0] / error;
//if (tid == 0) shared.reff[order] = reff; //if (tid == 0) shared.reff[order] = reff;
error += __fmul_rz(shared.gen1[0], reff); error += shared.gen1[0] * reff;
//error *= (1 - reff * reff);
if (tid < max_order - 1 - order) if (tid < max_order - 1 - order)
{ {
float g1 = shared.gen1[tid + 1] + __fmul_rz(reff, shared.gen0[tid]); float gen1 = shared.gen1[tid + 1] + reff * gen0;
float g0 = __fmul_rz(shared.gen1[tid + 1], reff) + shared.gen0[tid]; gen0 += shared.gen1[tid + 1] * reff;
shared.gen1[tid] = g1; shared.gen1[tid] = gen1;
shared.gen0[tid] = g0; }
// Levinson-Durbin recursion
shared.ldr[tid] += (tid < order) * reff * shared.ldr[order - 1 - tid] + (tid == order) * reff;
shared.lpc[((order * (order + 1)) >> 1) + tid] = -shared.ldr[tid];
shared.error[order] = error;
}
shared.order[tid] = tid < max_order ? tid : max_order - 1;
shared.order[tid + 32] = 0;
if (taskCount2 < max_order)
{
// Select best orders based on something similar to Schwartz's Criterion
shared.error[tid] = tid < max_order ? __logf(shared.error[tid]) + (tid * 0.01f) : __logf(shared.error[0]) + 1;
shared.error[tid + 32] = __logf(shared.error[0]) + 1;
for(int size = 2; size < 32; size <<= 1){
//Bitonic merge
int ddd = (threadIdx.x & (size / 2)) == 0;
for(int stride = size / 2; stride > 0; stride >>= 1){
int pos = 2 * threadIdx.x - (threadIdx.x & (stride - 1));
if ((shared.error[pos] >= shared.error[pos + stride]) == ddd)
{
float t = shared.error[pos];
shared.error[pos] = shared.error[pos + stride];
shared.error[pos + stride] = t;
int t1 = shared.order[pos];
shared.order[pos] = shared.order[pos + stride];
shared.order[pos + stride] = t1;
}
}
} }
// Levinson-Durbin recursion //ddd == dir for the last bitonic merge step
shared.ldr[tid] += (tid < order) * __fmul_rz(reff, shared.ldr[order - 1 - tid]) + (tid == order) * reff; {
for(int stride = 16; stride > 0; stride >>= 1){
int pos = 2 * threadIdx.x - (threadIdx.x & (stride - 1));
if (shared.error[pos] >= shared.error[pos + stride])
{
float t = shared.error[pos];
shared.error[pos] = shared.error[pos + stride];
shared.error[pos + stride] = t;
int t1 = shared.order[pos];
shared.order[pos] = shared.order[pos + stride];
shared.order[pos + stride] = t1;
}
}
}
// float l1 = shared.error[tid];
// #pragma unroll 0
// for (int sh = 4; sh >= 0; sh --)
// {
//float l2 = shared.error[threadIdx.x + (1 << sh)];
//shared.order[threadIdx.x] = shared.order[threadIdx.x + ((l2 < l1) << sh)];
//shared.error[threadIdx.x] = l1 = min(l1, l2);
// }
}
}
__syncthreads();
// Quantization // Quantization
//int precision = 13 - (shared.task.blocksize <= 2304) - (shared.task.blocksize <= 1152) - (shared.task.blocksize <= 576); for (int i = threadIdx.y; i < taskCount2; i += 8)
int precision = max(3, min(min(13 - (shared.task.blocksize <= 2304) - (shared.task.blocksize <= 1152) - (shared.task.blocksize <= 576), shared.task.abits), __clz(order) + 1 - shared.task.abits)); //for (int precision = 0; precision < 1; precision++)//precisions; precision++)
int taskNo = blockIdx.x * max_order + blockIdx.y * taskCount + order; {
shared.bits[tid] = __mul24((33 - __clz(__float2int_rn(fabs(shared.ldr[tid]) * (1 << 15))) - precision), tid <= order); int order = shared.order[i];
shared.bits[tid] = max(shared.bits[tid], shared.bits[tid + 16]); float lpc = threadIdx.x <= order ? shared.lpc[((order * (order + 1)) >> 1) + order - threadIdx.x] : 0.0f;
shared.bits[tid] = max(shared.bits[tid], shared.bits[tid + 8]); // get 15 bits of each coeff
shared.bits[tid] = max(shared.bits[tid], shared.bits[tid + 4]); int coef = __float2int_rn(lpc * (1 << 15));
shared.bits[tid] = max(shared.bits[tid], shared.bits[tid + 2]); // remove sign bits
shared.bits[tid] = max(shared.bits[tid], shared.bits[tid + 1]); shared.tmpi[tid] = coef ^ (coef >> 31);
int sh = max(0,min(15, 15 - shared.bits[0])); // OR reduction
shared.tmpi[tid] = shared.tmpi[tid] | shared.tmpi[tid + 8] | shared.tmpi[tid + 16] | shared.tmpi[tid + 24];
shared.tmpi[tid] = shared.tmpi[tid] | shared.tmpi[tid + 2] | shared.tmpi[tid + 4] | shared.tmpi[tid + 6];
//SUM32(shared.tmpi,tid,|=);
// choose precision
//int cbits = max(3, min(10, 5 + (shared.task.abits >> 1))); // - __float2int_rn(shared.PE[order - 1])
int cbits = max(3, min(min(13 - (shared.task.blocksize <= 2304) - (shared.task.blocksize <= 1152) - (shared.task.blocksize <= 576), shared.task.abits), __clz(order) + 1 - shared.task.abits));
// calculate shift based on precision and number of leading zeroes in coeffs
int shift = max(0,min(15, __clz(shared.tmpi[threadIdx.y * 32] | shared.tmpi[threadIdx.y * 32 + 1]) - 18 + cbits));
//if (shared.task.abits + 32 - __clz(order) < shift
//int shift = max(0,min(15, (shared.task.abits >> 2) - 14 + __clz(shared.tmpi[threadIdx.x & ~31]) + ((32 - __clz(order))>>1)));
// quantize coeffs with given shift
coef = max(-(1 << (cbits - 1)), min((1 << (cbits - 1)) -1, __float2int_rn(lpc * (1 << shift))));
// error correction
//shared.tmp[threadIdx.x] = (threadIdx.x != 0) * (shared.arp[threadIdx.x - 1]*(1 << shared.task.shift) - shared.task.coefs[threadIdx.x - 1]);
//shared.task.coefs[threadIdx.x] = max(-(1 << (shared.task.cbits - 1)), min((1 << (shared.task.cbits - 1))-1, __float2int_rn((shared.arp[threadIdx.x]) * (1 << shared.task.shift) + shared.tmp[threadIdx.x])));
// remove sign bits
shared.tmpi[tid] = coef ^ (coef >> 31);
// OR reduction
shared.tmpi[tid] = shared.tmpi[tid] | shared.tmpi[tid + 8] | shared.tmpi[tid + 16] | shared.tmpi[tid + 24];
shared.tmpi[tid] = shared.tmpi[tid] | shared.tmpi[tid + 2] | shared.tmpi[tid + 4] | shared.tmpi[tid + 6];
//SUM32(shared.tmpi,tid,|=);
// calculate actual number of bits (+1 for sign)
cbits = 1 + 32 - __clz(shared.tmpi[threadIdx.y * 32] | shared.tmpi[threadIdx.y * 32 + 1]);
// reverse coefs // output shift, cbits and output coeffs
int coef = max(-(1 << precision),min((1 << precision)-1,__float2int_rn(-shared.ldr[order - tid] * (1 << sh)))); int taskNo = blockIdx.y * taskCount + blockIdx.x * taskCount2 + i;
if (tid <= order) if (threadIdx.x == 0)
tasks[taskNo].coefs[tid] = coef; tasks[taskNo].data.shift = shift;
if (tid == 0) if (threadIdx.x == 0)
tasks[taskNo].data.shift = sh;
shared.bits[tid] = __mul24(33 - __clz(coef ^ (coef >> 31)), tid <= order);
shared.bits[tid] = max(shared.bits[tid], shared.bits[tid + 16]);
shared.bits[tid] = max(shared.bits[tid], shared.bits[tid + 8]);
shared.bits[tid] = max(shared.bits[tid], shared.bits[tid + 4]);
shared.bits[tid] = max(shared.bits[tid], shared.bits[tid + 2]);
shared.bits[tid] = max(shared.bits[tid], shared.bits[tid + 1]);
int cbits = shared.bits[0];
if (tid == 0)
tasks[taskNo].data.cbits = cbits; tasks[taskNo].data.cbits = cbits;
if (threadIdx.x == 0)
tasks[taskNo].data.residualOrder = order + 1;
if (threadIdx.x <= order)
tasks[taskNo].coefs[threadIdx.x] = coef;
} }
} }
extern "C" __global__ void cudaQuantizeLPC(
FlaCudaSubframeTask *tasks,
int taskCount, // tasks per block
int taskCountLPC, // LPC tasks per block
int windowCount, // sets of coeffs per block
float*lpcs,
int max_order // should be <= 32
)
{
__shared__ struct {
FlaCudaSubframeData task;
volatile int tmpi[256];
volatile int order[256];
volatile float error[256];
} shared;
const int tid = threadIdx.x + threadIdx.y * 32;
// fetch task data
if (tid < sizeof(shared.task) / sizeof(int))
((int*)&shared.task)[tid] = ((int*)(tasks + blockIdx.y * taskCount))[tid];
__syncthreads();
shared.order[tid] = min(max_order - 1, threadIdx.x) + min(threadIdx.y, windowCount - 1) * 32;
shared.error[tid] = 10000.0f + shared.order[tid];
{
int lpcs_offs = (threadIdx.y + blockIdx.y * windowCount) * (max_order + 1) * 32;
// Select best orders based on Akaike's Criteria
// Load prediction error estimates
if (threadIdx.y < windowCount && threadIdx.x < max_order)
shared.error[tid] = __logf(lpcs[lpcs_offs + max_order * 32 + threadIdx.x]) + (threadIdx.x * 0.01f);
__syncthreads();
// Sort using bitonic sort
for(int size = 2; size < 64; size <<= 1){
//Bitonic merge
int ddd = (tid & (size / 2)) == 0;
for(int stride = size / 2; stride > 0; stride >>= 1){
__syncthreads();
int pos = 2 * tid - (tid & (stride - 1));
if ((shared.error[pos] >= shared.error[pos + stride]) == ddd)
{
float t = shared.error[pos];
shared.error[pos] = shared.error[pos + stride];
shared.error[pos + stride] = t;
int t1 = shared.order[pos];
shared.order[pos] = shared.order[pos + stride];
shared.order[pos + stride] = t1;
}
}
}
//ddd == dir for the last bitonic merge step
{
for(int stride = 32; stride > 0; stride >>= 1){
__syncthreads();
int pos = 2 * tid - (tid & (stride - 1));
if (shared.error[pos] >= shared.error[pos + stride])
{
float t = shared.error[pos];
shared.error[pos] = shared.error[pos + stride];
shared.error[pos + stride] = t;
int t1 = shared.order[pos];
shared.order[pos] = shared.order[pos + stride];
shared.order[pos + stride] = t1;
}
}
}
}
__syncthreads();
// Quantization
for (int i = threadIdx.y; i < taskCountLPC; i += 8)
//for (int precision = 0; precision < 1; precision++)//precisions; precision++)
{
int order = shared.order[i] & 31;
int lpcs_offs = ((shared.order[i] >> 5) + blockIdx.y * windowCount) * (max_order + 1) * 32;
float lpc = threadIdx.x <= order ? lpcs[lpcs_offs + order * 32 + order - threadIdx.x] : 0.0f;
// get 15 bits of each coeff
int coef = __float2int_rn(lpc * (1 << 15));
// remove sign bits
shared.tmpi[tid] = coef ^ (coef >> 31);
// OR reduction
shared.tmpi[tid] = shared.tmpi[tid] | shared.tmpi[tid + 8] | shared.tmpi[tid + 16] | shared.tmpi[tid + 24];
shared.tmpi[tid] = shared.tmpi[tid] | shared.tmpi[tid + 2] | shared.tmpi[tid + 4] | shared.tmpi[tid + 6];
//SUM32(shared.tmpi,tid,|=);
// choose precision
//int cbits = max(3, min(10, 5 + (shared.task.abits >> 1))); // - __float2int_rn(shared.PE[order - 1])
int cbits = max(3, min(min(13 - (shared.task.blocksize <= 2304) - (shared.task.blocksize <= 1152) - (shared.task.blocksize <= 576), shared.task.abits), __clz(order) + 1 - shared.task.abits));
// calculate shift based on precision and number of leading zeroes in coeffs
int shift = max(0,min(15, __clz(shared.tmpi[threadIdx.y * 32] | shared.tmpi[threadIdx.y * 32 + 1]) - 18 + cbits));
//if (shared.task.abits + 32 - __clz(order) < shift
//int shift = max(0,min(15, (shared.task.abits >> 2) - 14 + __clz(shared.tmpi[threadIdx.x & ~31]) + ((32 - __clz(order))>>1)));
// quantize coeffs with given shift
coef = max(-(1 << (cbits - 1)), min((1 << (cbits - 1)) -1, __float2int_rn(lpc * (1 << shift))));
// error correction
//shared.tmp[threadIdx.x] = (threadIdx.x != 0) * (shared.arp[threadIdx.x - 1]*(1 << shared.task.shift) - shared.task.coefs[threadIdx.x - 1]);
//shared.task.coefs[threadIdx.x] = max(-(1 << (shared.task.cbits - 1)), min((1 << (shared.task.cbits - 1))-1, __float2int_rn((shared.arp[threadIdx.x]) * (1 << shared.task.shift) + shared.tmp[threadIdx.x])));
// remove sign bits
shared.tmpi[tid] = coef ^ (coef >> 31);
// OR reduction
shared.tmpi[tid] = shared.tmpi[tid] | shared.tmpi[tid + 8] | shared.tmpi[tid + 16] | shared.tmpi[tid + 24];
shared.tmpi[tid] = shared.tmpi[tid] | shared.tmpi[tid + 2] | shared.tmpi[tid + 4] | shared.tmpi[tid + 6];
//SUM32(shared.tmpi,tid,|=);
// calculate actual number of bits (+1 for sign)
cbits = 1 + 32 - __clz(shared.tmpi[threadIdx.y * 32] | shared.tmpi[threadIdx.y * 32 + 1]);
// output shift, cbits and output coeffs
int taskNo = blockIdx.y * taskCount + i;
if (threadIdx.x == 0)
tasks[taskNo].data.shift = shift;
if (threadIdx.x == 0)
tasks[taskNo].data.cbits = cbits;
if (threadIdx.x == 0)
tasks[taskNo].data.residualOrder = order + 1;
if (threadIdx.x <= order)
tasks[taskNo].coefs[threadIdx.x] = coef;
}
} }
extern "C" __global__ void cudaComputeLPCLattice( extern "C" __global__ void cudaComputeLPCLattice(
@@ -632,6 +832,8 @@ extern "C" __global__ void cudaEstimateResidual(
output[(blockIdx.y * blockDim.y + threadIdx.y) * 64 + blockIdx.x] = min(min(shared.residual[tid], shared.residual[tid + 1]), min(shared.residual[tid + 2], shared.residual[tid + 3])); output[(blockIdx.y * blockDim.y + threadIdx.y) * 64 + blockIdx.x] = min(min(shared.residual[tid], shared.residual[tid + 1]), min(shared.residual[tid + 2], shared.residual[tid + 3]));
} }
#define FASTMUL(a,b) __mul24(a,b)
extern "C" __global__ void cudaEstimateResidual8( extern "C" __global__ void cudaEstimateResidual8(
int*output, int*output,
int*samples, int*samples,
@@ -705,14 +907,14 @@ extern "C" __global__ void cudaEstimateResidual12(
) )
{ {
__shared__ struct { __shared__ struct {
int data[32*9]; volatile int data[32*9];
volatile int residual[32*8]; volatile int residual[32*8];
FlaCudaSubframeData task[8]; FlaCudaSubframeData task[8];
int coefs[8*32]; int coefs[8*32];
} shared; } shared;
const int tid = threadIdx.x + threadIdx.y * 32; const int tid = threadIdx.x + threadIdx.y * 32;
if (threadIdx.x < sizeof(FlaCudaSubframeData)/sizeof(int)) if (threadIdx.x < sizeof(FlaCudaSubframeData)/sizeof(int))
((int*)&shared.task[threadIdx.y])[threadIdx.x] = ((int*)(&tasks[blockIdx.y * blockDim.y + threadIdx.y]))[threadIdx.x]; ((int*)&shared.task[threadIdx.y])[threadIdx.x] = ((int*)(&tasks[FASTMUL(blockIdx.y, blockDim.y) + threadIdx.y]))[threadIdx.x];
__syncthreads(); __syncthreads();
const int pos = blockIdx.x * partSize; const int pos = blockIdx.x * partSize;
const int dataLen = min(frameSize - pos, partSize + max_order); const int dataLen = min(frameSize - pos, partSize + max_order);
@@ -723,29 +925,30 @@ extern "C" __global__ void cudaEstimateResidual12(
__syncthreads(); __syncthreads();
shared.residual[tid] = 0; const int ro = shared.task[threadIdx.y].residualOrder;
shared.coefs[tid] = threadIdx.x < shared.task[threadIdx.y].residualOrder ? tasks[blockIdx.y * blockDim.y + threadIdx.y].coefs[threadIdx.x] : 0; const int residualLen = max(0,min(frameSize - pos - ro, partSize));
const int residualLen = shared.task[threadIdx.y].type == Verbatim ? 0 : max(0,min(frameSize - pos - shared.task[threadIdx.y].residualOrder, partSize));
const int ptr2 = threadIdx.y << 5; const int ptr2 = threadIdx.y << 5;
shared.coefs[tid] = threadIdx.x < ro ? tasks[FASTMUL(blockIdx.y, blockDim.y) + threadIdx.y].coefs[threadIdx.x] : 0;
int s = 0; int s = 0;
for (int ptr = threadIdx.x; ptr < residualLen; ptr += 32) for (int ptr = shared.task[threadIdx.y].type == Verbatim ? residualLen : threadIdx.x; ptr < residualLen; ptr += 32)
{ {
// compute residual // compute residual
int sum = int sum =
__mul24(shared.data[ptr + 0], shared.coefs[ptr2 + 0]) + FASTMUL(shared.data[ptr + 0], shared.coefs[ptr2 + 0]) +
__mul24(shared.data[ptr + 1], shared.coefs[ptr2 + 1]) + FASTMUL(shared.data[ptr + 1], shared.coefs[ptr2 + 1]) +
__mul24(shared.data[ptr + 2], shared.coefs[ptr2 + 2]) + FASTMUL(shared.data[ptr + 2], shared.coefs[ptr2 + 2]) +
__mul24(shared.data[ptr + 3], shared.coefs[ptr2 + 3]) + FASTMUL(shared.data[ptr + 3], shared.coefs[ptr2 + 3]) +
__mul24(shared.data[ptr + 4], shared.coefs[ptr2 + 4]) + FASTMUL(shared.data[ptr + 4], shared.coefs[ptr2 + 4]) +
__mul24(shared.data[ptr + 5], shared.coefs[ptr2 + 5]) + FASTMUL(shared.data[ptr + 5], shared.coefs[ptr2 + 5]) +
__mul24(shared.data[ptr + 6], shared.coefs[ptr2 + 6]) + FASTMUL(shared.data[ptr + 6], shared.coefs[ptr2 + 6]) +
__mul24(shared.data[ptr + 7], shared.coefs[ptr2 + 7]) + FASTMUL(shared.data[ptr + 7], shared.coefs[ptr2 + 7]) +
__mul24(shared.data[ptr + 8], shared.coefs[ptr2 + 8]) + FASTMUL(shared.data[ptr + 8], shared.coefs[ptr2 + 8]) +
__mul24(shared.data[ptr + 9], shared.coefs[ptr2 + 9]) + FASTMUL(shared.data[ptr + 9], shared.coefs[ptr2 + 9]) +
__mul24(shared.data[ptr + 10], shared.coefs[ptr2 + 10]) + FASTMUL(shared.data[ptr + 10], shared.coefs[ptr2 + 10]) +
__mul24(shared.data[ptr + 11], shared.coefs[ptr2 + 11]); FASTMUL(shared.data[ptr + 11], shared.coefs[ptr2 + 11]);
sum = shared.data[ptr + shared.task[threadIdx.y].residualOrder] - (sum >> shared.task[threadIdx.y].shift); sum = shared.data[ptr + ro] - (sum >> shared.task[threadIdx.y].shift);
s += min(0x7fffff,(sum << 1) ^ (sum >> 31)); s += min(0x7fffff,(sum << 1) ^ (sum >> 31));
} }
@@ -756,7 +959,7 @@ extern "C" __global__ void cudaEstimateResidual12(
// rice parameter search // rice parameter search
shared.residual[tid] = (shared.task[threadIdx.y].type != Constant || shared.residual[threadIdx.y << 5] != 0) * shared.residual[tid] = (shared.task[threadIdx.y].type != Constant || shared.residual[threadIdx.y << 5] != 0) *
(__mul24(threadIdx.x >= 15, 0x7fffff) + residualLen * (threadIdx.x + 1) + ((shared.residual[threadIdx.y << 5] - (residualLen >> 1)) >> threadIdx.x)); (__mul24(threadIdx.x >= 15, 0x7fffff) + FASTMUL(residualLen, threadIdx.x + 1) + ((shared.residual[threadIdx.y << 5] - (residualLen >> 1)) >> threadIdx.x));
shared.residual[tid] = min(min(shared.residual[tid], shared.residual[tid + 4]), min(shared.residual[tid + 8], shared.residual[tid + 12])); shared.residual[tid] = min(min(shared.residual[tid], shared.residual[tid + 4]), min(shared.residual[tid + 8], shared.residual[tid + 12]));
if (threadIdx.x == 0) if (threadIdx.x == 0)
output[(blockIdx.y * blockDim.y + threadIdx.y) * 64 + blockIdx.x] = min(min(shared.residual[tid], shared.residual[tid + 1]), min(shared.residual[tid + 2], shared.residual[tid + 3])); output[(blockIdx.y * blockDim.y + threadIdx.y) * 64 + blockIdx.x] = min(min(shared.residual[tid], shared.residual[tid + 1]), min(shared.residual[tid + 2], shared.residual[tid + 3]));

View File

@@ -289,11 +289,186 @@ code {
0x10170003 0x00001280 0xf0000001 0xe0000001 0x10170003 0x00001280 0xf0000001 0xe0000001
} }
} }
code {
name = cudaQuantizeLPC
lmem = 0
smem = 3176
reg = 13
bar = 1
const {
segname = const
segnum = 1
offset = 0
bytes = 56
mem {
0x000003ff 0x0000000f 0x3c23d70a 0x00000001
0xffffffff 0x0000003f 0x0000001f 0x00000900
0x0000000c 0x00000480 0x00000240 0x00000003
0x00000020 0x0000009e
}
}
bincode {
0xd0800205 0x00400780 0xa0000205 0x04000780
0xa0000015 0x04000780 0x30050201 0xc4100780
0x20000a11 0x04000780 0x200a8801 0x00000003
0x00020009 0xc0000780 0x308109fd 0x644107c8
0xa0015003 0x00000000 0x30020809 0xc4100780
0x10015003 0x00000280 0x1000ca01 0x0423c780
0x40014e0d 0x00200780 0x3010060d 0xc4100780
0x60004e01 0x0020c780 0x3007000d 0xc4100780
0x30060001 0xc4100780 0x20008600 0x2100e800
0x20000401 0x04000780 0xd00e0001 0x80c00780
0x08000001 0xe4200780 0xf0000001 0xe0000002
0x861ffe03 0x00000000 0x213fee09 0x0fffffff
0x213ff201 0x0fffffff 0x30020209 0xa4000780
0x30000a01 0xa4000780 0x30050409 0xc4100780
0x20000001 0x04008780 0x08022001 0xe4200780
0xd8088005 0x20000780 0xa400c001 0x44214780
0x3001cffd 0x642107c8 0xb0000001 0x0461c403
0x3005d3fd 0x642102c8 0x08042001 0xe4200780
0xa003b003 0x00000000 0x1003b003 0x00000100
0x1000ce01 0x0423c780 0x40014e09 0x00200780
0x30100409 0xc4100780 0x60004e09 0x00208780
0x2101f201 0x00000003 0x20000409 0x04004780
0x4005000d 0x00000780 0x6004020d 0x0000c780
0x3010060d 0xc4100780 0x60040009 0x0000c780
0x3005d201 0xc4300780 0x30050409 0xc4100780
0x20028000 0x20008a00 0x30020001 0xc4100780
0x2000d001 0x04200780 0xd00e0001 0x80c00780
0x90000001 0x60000780 0xa0000a09 0x44004780
0xc0180001 0x03f31723 0xe00a0401 0x03c23d73
0x08042001 0xe4200780 0x30010819 0xc4100782
0x1002801d 0x00000003 0x301f0e01 0xec100780
0xd0830001 0x04400780 0x20000001 0x0401c780
0x30010001 0xec100780 0x307c01fd 0x6c00c7c8
0x10064003 0x00000280 0xd0040009 0x04000780
0x307c05fd 0x6c0087c8 0x861ffe03 0x00000000
0x203f8009 0x0fffffff 0xd0040409 0x04000780
0x20428c0c 0x20008608 0x00020405 0xc0000780
0xd410d00d 0x20000780 0x1c00c009 0x0423c780
0x0002060d 0xc0000780 0xdc10d011 0x20000780
0xb002c00d 0x60218784 0x10000011 0x00000003
0xa0000615 0x08014780 0x10001011 0x20408280
0xd00509fd 0x000087d8 0xa0061003 0x00000000
0x10061003 0x00001280 0xd410d011 0x20000780
0x1000c00d 0x0423c784 0xdc10d011 0x20000780
0x1000c009 0x0423c784 0x0c043401 0xe420c780
0x04043401 0xe4208780 0xd408d011 0x20000780
0x1000c00d 0x0423c784 0xdc08d011 0x20000780
0x1000c009 0x0423c784 0x0c023401 0xe420c780
0x04023401 0xe4208780 0x30010001 0xec100782
0x307c01fd 0x6c0107d8 0x10045003 0x00001280
0x30010e1d 0xc4100780 0x30850ffd 0x6c40c7c8
0x1003d003 0x00000280 0x1020800d 0x00000003
0x861ffe03 0x00000000 0x203f8601 0x0fffffff
0xd0040001 0x04000780 0x20408c08 0x20038400
0x00020005 0xc0000780 0xd410d00d 0x20000780
0x1c00c001 0x0423c780 0x0002040d 0xc0000780
0xdc10d011 0x20000780 0xb000c1fd 0x602187cc
0xa0080003 0x00000000 0x10080003 0x00000100
0xd410d011 0x20000780 0x1000c009 0x0423c784
0xdc10d011 0x20000780 0x1000c001 0x0423c784
0x0c043401 0xe4208780 0x04043401 0xe4200780
0xd408d011 0x20000780 0x1000c009 0x0423c784
0xdc08d011 0x20000780 0x1000c001 0x0423c784
0x0c023401 0xe4208780 0x04023401 0xe4200780
0x3001060d 0xec100782 0x307c07fd 0x6c0107c8
0x10068003 0x00000280 0x861ffe03 0x00000000
0x3001cdfd 0x6c20c7c8 0x10000211 0x0403c780
0x30000003 0x00000280 0xa0004e09 0x04200780
0x1000ce01 0x0423c780 0x4004020d 0x00000780
0x200a8219 0x00000003 0x3010060d 0xc4100780
0x00020c0d 0xc0000780 0x60040019 0x0000c780
0x307c0bfd 0x640087c8 0x2101f20d 0x00000003
0xdc088005 0x20000780 0x3405c001 0xec300780
0x20000c01 0x04000780 0x40010c1d 0x00000780
0x60000e21 0x0001c780 0xd486c01d 0x04600780
0x30101021 0xc4100780 0x30050e25 0x640187e0
0x60000c01 0x00020780 0xa00013fd 0x0c0147d8
0xa00a5003 0x00000000 0x30050001 0xc4100780
0x100a4003 0x00002100 0x30050e21 0xc4100780
0x20088000 0x20008e00 0x20400001 0x04014780
0x30020001 0xc4100780 0x2000d001 0x04200780
0xd00e0021 0x80c00780 0x100a5003 0x00000780
0x1000f821 0x0403c780 0xd8010005 0x20000782
0xc0001001 0x04700003 0xa0000001 0x8c004780
0x301f0025 0xec100780 0xd0090001 0x04008780
0x08002001 0xe4200780 0x1400d025 0x0423c780
0x1800f001 0x0423c780 0xd409c025 0x04204780
0xd800e001 0x04204780 0xd0090001 0x04004780
0x08002001 0xe4200780 0x1800e401 0x0423c780
0x1800ec25 0x0423c780 0xd800e001 0x04204780
0xd809e825 0x04204780 0xd0090001 0x04004780
0x307c0ffd 0x6c0087e8 0x08002001 0xe4200780
0xa0000e01 0x44066500 0x30170001 0xec102500
0x31000025 0x04436500 0x10001825 0x2440e280
0x00070205 0xc0000780 0x1400f601 0x0423c780
0xd400f401 0x042047e0 0xa0000001 0x44066680
0x30170001 0xec102680 0x31000001 0x04436680
0x10001801 0x2440e100 0x3087e3fd 0x6c60c7e8
0x3089e22d 0x6c60c780 0x100d8029 0x00000003
0xd083162d 0x04400780 0x10001029 0x2440e280
0x308ae231 0x6c60c780 0x20401429 0x0402c780
0xd083182d 0x04400780 0x3000ec25 0x04224780
0x20401429 0x0402c780 0x20019225 0x00000003
0x300aec29 0xac200780 0x300a1225 0xac000780
0x308b1229 0x8c400780 0x20000001 0x04028780
0x202e8001 0x0fffffff 0x30810025 0xac400780
0x10018001 0x00000003 0x307c1225 0x8c000780
0x3009002d 0xc4000780 0x203f9429 0x0fffffff
0xa000162d 0x44014780 0x300a0001 0xc4000780
0xc0081621 0x00000780 0x203f8029 0x0fffffff
0xa0001021 0x8c004780 0x300a1021 0xac000780
0x30008001 0x00000003 0x30001021 0x8c000780
0x301f1001 0xec100780 0xd0001001 0x04008780
0x08002001 0xe4200780 0xd8010005 0x20000780
0x1400d029 0x0423c780 0x1800f001 0x0423c780
0xd40ac029 0x04204780 0xd800e001 0x04204780
0xd00a0001 0x04004780 0x08002001 0xe4200780
0x1800e401 0x0423c780 0x1800ec29 0x0423c780
0xd800e001 0x04204780 0xd80ae829 0x04204780
0xd00a0001 0x04004780 0x08002001 0xe4200780
0x00070205 0xc0000780 0x1400f601 0x0423c780
0xd400f401 0x042047e0 0xa0000001 0x44066680
0x30170001 0xec102680 0x31000029 0x04436680
0x10001829 0x2440e100 0xa011e003 0x00000000
0x1011e003 0x00000100 0x1000ca01 0x0423c780
0x4005002d 0x00000780 0x6004022d 0x0002c780
0x3010162d 0xc4100780 0x60040001 0x0002c780
0x2000002d 0x04010780 0x30071601 0xc4100780
0x3006162d 0xc4100780 0x200b8000 0x2100e800
0x20088001 0x00000003 0xd00e0025 0xa0c00780
0x1011e003 0x00000100 0x1000ca01 0x0423c780
0x40050025 0x00000780 0x60040225 0x00024780
0x30101225 0xc4100780 0x60040001 0x00024780
0x20000001 0x04010780 0x30070025 0xc4100780
0x30060001 0xc4100780 0x20009200 0x2100e800
0x30219425 0x00000003 0x200c8001 0x00000003
0xd00e0025 0xa0c00780 0x1011e003 0x00000100
0x1000ca01 0x0423c780 0x40050025 0x00000780
0x60040225 0x00024780 0x30101225 0xc4100780
0x60040001 0x00024780 0x20000001 0x04010780
0x30070025 0xc4100780 0x30060001 0xc4100780
0x20001201 0x04000780 0x20018e1d 0x00000003
0x2000c801 0x04200780 0xd00e001d 0xa0c00780
0xf0000001 0xe0000002 0xa012e003 0x00000000
0x1012e003 0x00001100 0x1000ca01 0x0423c780
0x4005001d 0x00000780 0x6004021d 0x0001c780
0x30100e1d 0xc4100780 0x60040001 0x0001c780
0x20000001 0x04010780 0x3007001d 0xc4100780
0x30060001 0xc4100780 0x20000e1d 0x04000780
0x30020a01 0xc4100780 0x2107e81c 0x20078000
0x20008001 0x00000007 0xd00e0021 0xa0c00780
0xdc00400d 0x20000782 0x20088811 0x00000003
0x3004cdfd 0x6c2107d8 0x10090003 0x00001280
0xf0000001 0xe0000001
}
}
code { code {
name = cudaComputeAutocor name = cudaComputeAutocor
lmem = 0 lmem = 0
smem = 3312 smem = 3328
reg = 9 reg = 7
bar = 1 bar = 1
const { const {
segname = const segname = const
@@ -307,79 +482,84 @@ code {
bincode { bincode {
0x10000005 0x0403c780 0xd0800601 0x00400780 0x10000005 0x0403c780 0xd0800601 0x00400780
0xa0000001 0x04000780 0xa0000415 0x04000780 0xa0000001 0x04000780 0xa0000415 0x04000780
0x30050005 0xc4100780 0x20000a09 0x04004780 0x30050005 0xc4100780 0x20000a0d 0x04004780
0x308105fd 0x644107c8 0xa001c003 0x00000000 0x308107fd 0x644107c8 0xa0015003 0x00000000
0x3002040d 0xc4100780 0x1001c003 0x00000280 0x30020611 0xc4100780 0x10015003 0x00000280
0x10018011 0x00000003 0x1000d205 0x0423c780 0xa0004e09 0x04200780 0x1000d205 0x0423c780
0x30010819 0xc4000780 0xa0004e11 0x04200780 0x30010405 0xe4000780 0x4001d405 0x00218780
0x203f8c19 0x0fffffff 0x30010805 0xe4000780 0x30070209 0xc4100780 0x30060205 0xc4100780
0xd0060811 0x04000780 0x4141f404 0x4144f010 0x20018404 0x2101ee04 0x20000805 0x04004780
0x30070219 0xc4100780 0x3006021d 0xc4100780 0xd00e0205 0x80c00780 0x00000805 0xc0000780
0x30070805 0xc4100780 0x30060811 0xc4100780 0x04061601 0xe4204780 0x307c07fd 0x6c0147ca
0x20078c18 0x20048204 0x2106ee10 0x20018604 0xa002e003 0x00000000 0x1002e003 0x00000280
0x20000205 0x04010780 0xd00e0205 0x80c00780 0xa0004c09 0x04200780 0x10018019 0x00000003
0x00000605 0xc0000780 0x04061601 0xe4204780 0x1000d205 0x0423c780 0x404f8409 0x00000003
0x30010c19 0xc4000780 0x30050405 0xc4100780
0xa0004e09 0x04200780 0x203f8c19 0x0fffffff
0x00067801 0xe4204780 0xd019e009 0x20000780
0xd0060409 0x04000780 0xd0186005 0x20000780
0x1800c005 0x0423c780 0x6402cc05 0x80204780
0x1400cc09 0x0423c780 0x00067c01 0xe4204780
0x1900e004 0x2501e004 0x00067e01 0xe4204780
0x2120f019 0x0000001f 0x3800c005 0x04208780
0x30060205 0xac000780 0x00067a01 0xe4204780
0xf0000001 0xe0000002 0x861ffe03 0x00000000 0xf0000001 0xe0000002 0x861ffe03 0x00000000
0xa0004c11 0x04200780 0x404f8805 0x00000003 0xd019e805 0x20000780 0x3403c1fd 0x6c20c7c8
0xd0189005 0x20000780 0x30050205 0xc4100780 0xa0040003 0x00000000 0x1003f003 0x00000280
0x2120f019 0x0000001f 0x2440c01d 0x04204780 0xd019f005 0x20000780 0x2503e204 0x2503e008
0x30070c19 0xac000780 0x30020dfd 0x6c00c7c8 0x30020205 0xc4100780 0x30020419 0xc4100780
0xa0035003 0x00000000 0x10034003 0x00000280 0x2000ca05 0x04204780 0xd00e0209 0x80c00780
0xd0186005 0x20000780 0x2002821c 0x2507e020 0x2000cc05 0x04218780 0xd00e0205 0x80c00780
0x2400da1d 0x0421c780 0x30021021 0xc4100780 0xa0000409 0x44014780 0xc0010405 0x00000780
0x30020e1d 0xc4100780 0x2000ca21 0x04220780 0x10040003 0x00000780 0x1000f805 0x0403c780
0xd00e1021 0x80c00780 0x2000cc1d 0x0421c780 0x00000805 0xc0000782 0x04001601 0xe4204780
0xd00e0e1d 0x80c00780 0xa0001021 0x44014780 0xd019e805 0x20000780 0x20008605 0x00000013
0xc007101d 0x00000780 0x10035003 0x00000780 0x3401c1fd 0x6c20c7c8 0x00020205 0xc0000780
0x1000f81d 0x0403c780 0x00000605 0xc0000782 0xa0056003 0x00000000 0x10055003 0x00000280
0x20008421 0x00000013 0x30080dfd 0x6c00c7c8 0xd019f009 0x20000780 0x2903e204 0x2903e008
0x04001601 0xe421c780 0xa0049003 0x00000000 0x20008205 0x00000013 0x20008409 0x00000013
0x10048003 0x00000280 0x20000205 0x04008780 0x30020205 0xc4100780 0x30020419 0xc4100780
0xd0186005 0x20000780 0x2501e018 0x2501fa1c 0x2000ca05 0x04204780 0xd00e0209 0x80c00780
0x30020c05 0xc4100780 0x30020e19 0xc4100780 0x2000cc05 0x04218780 0xd00e0205 0x80c00780
0x2101ea04 0x2106ec1c 0x20008205 0x00000043 0xa0000409 0x44014780 0xc0010405 0x00000780
0xd00e0219 0x80c00780 0x20008e05 0x00000043 0x10056003 0x00000780 0x1000f805 0x0403c780
0xd00e0205 0x80c00780 0xa0000c19 0x44014780 0x04001601 0xe4204782 0x861ffe03 0x00000000
0xc0010c05 0x00000780 0x10049003 0x00000780 0x3000d1fd 0x6c2047c8 0xa0090003 0x00000000
0x1000f805 0x0403c780 0x00000605 0xc0000782 0x10090003 0x00000280 0x307c0bfd 0x640087c8
0x04021601 0xe4204780 0x861ffe03 0x00000000 0x404f8a09 0x00000003 0x20000405 0x04000780
0x3000d1fd 0x6c2047c8 0xa0084003 0x00000000 0x00020209 0xc0000780 0x00020405 0xc0000780
0x10084003 0x00000280 0x307c0bfd 0x640087c8 0x1900f814 0x1900f604 0xc405d815 0x00200780
0x404f8a15 0x00000003 0x20000a05 0x04000780 0xe401d615 0x00214780 0x1800da05 0x0423c780
0x00020209 0xc0000780 0x00020a05 0xc0000780 0xe401da15 0x00214780 0x1800dc05 0x0423c780
0x1900f818 0x1900f604 0xc406d819 0x00200780 0xe401dc15 0x00214780 0x1800de05 0x0423c780
0xe401d619 0x00218780 0x1800da05 0x0423c780 0xe401de15 0x00214780 0x1800e005 0x0423c780
0xe401da19 0x00218780 0x1800dc05 0x0423c780 0xe401e015 0x00214780 0x1800e205 0x0423c780
0xe401dc19 0x00218780 0x1800de05 0x0423c780 0xe401e215 0x00214780 0x1800e405 0x0423c780
0xe401de19 0x00218780 0x1800e005 0x0423c780 0xe401e415 0x00214780 0x1800e605 0x0423c780
0xe401e019 0x00218780 0x1800e205 0x0423c780 0xe401e615 0x00214780 0x1800e805 0x0423c780
0xe401e219 0x00218780 0x1800e405 0x0423c780 0xe401e815 0x00214780 0x1800ea05 0x0423c780
0xe401e419 0x00218780 0x1800e605 0x0423c780 0xe401ea15 0x00214780 0x1800ec05 0x0423c780
0xe401e619 0x00218780 0x1800e805 0x0423c780 0xe401ec15 0x00214780 0x1800ee05 0x0423c780
0xe401e819 0x00218780 0x1800ea05 0x0423c780 0xe401ee15 0x00214780 0x1800f005 0x0423c780
0xe401ea19 0x00218780 0x1800ec05 0x0423c780 0xe401f015 0x00214780 0x1800f205 0x0423c780
0xe401ec19 0x00218780 0x1800ee05 0x0423c780 0xe401f205 0x00214780 0x00000805 0xc0000780
0xe401ee19 0x00218780 0x1800f005 0x0423c780
0xe401f019 0x00218780 0x1800f205 0x0423c780
0xe401f205 0x00218780 0x00000605 0xc0000780
0x04041601 0xe4204780 0xd4105809 0x20000780 0x04041601 0xe4204780 0xd4105809 0x20000780
0x1900f004 0xb9016004 0xb800e005 0x00204780 0x1900f004 0xb9016004 0xb800e005 0x00204780
0xb800f005 0x00204780 0x04041601 0xe4204780 0xb800f005 0x00204780 0x04041601 0xe4204780
0x1900e404 0xb9016004 0xb9016804 0xb9016c04 0x1900e404 0xb9016004 0xb9016804 0xb9016c04
0x04041601 0xe4204780 0x00000605 0xc0000680 0x04041601 0xe4204780 0x00000805 0xc0000680
0xd4105809 0x20000680 0x1800c205 0x0423c680 0xd4105809 0x20000680 0x1800c205 0x0423c680
0x00020005 0xc0000680 0xb800c005 0x00204680 0x00020005 0xc0000680 0xb800c005 0x00204680
0x04063601 0xe4204680 0x20088001 0x00000003 0x04063601 0xe4204680 0x20088001 0x00000003
0x3000d1fd 0x6c2187d8 0x10051003 0x00001280 0x3000d1fd 0x6c2187d8 0x1005d003 0x00001280
0xf0000001 0xe0000002 0x861ffe03 0x00000000 0xf0000001 0xe0000002 0x861ffe03 0x00000000
0x3002d1fd 0x6c2047c8 0x30000003 0x00000280 0x3003d1fd 0x6c2047c8 0x30000003 0x00000280
0x10004e01 0x0023c780 0x60004801 0x00210780 0xa0004e01 0x04200780 0xa0004805 0x04200780
0x2101f005 0x00000003 0x40030011 0x00000780 0x40010005 0x00018780 0xa0004c01 0x04200780
0x60020211 0x00010780 0x30100811 0xc4100780 0x20000001 0x04004780 0x2101f005 0x00000003
0x60020001 0x00010780 0x00000605 0xc0000780 0x00000805 0xc0000780 0x60010001 0x8000c780
0x20000001 0x04008780 0xd418d805 0x20000780 0xd418d805 0x20000780 0x30020005 0xc4100780
0x30020005 0xc4100780 0x1500e000 0x2101e804 0x1500e000 0x2101e804 0xd00e0201 0xa0c00781
0xd00e0201 0xa0c00781
} }
} }
code { code {
@@ -478,97 +658,94 @@ code {
} }
bincode { bincode {
0xd0800205 0x00400780 0xa000001d 0x04000780 0xd0800205 0x00400780 0xa000001d 0x04000780
0xa0000219 0x04000780 0x30810ffd 0x644107c8 0xa0000215 0x04000780 0x30810ffd 0x644107c8
0xa0012003 0x00000000 0x30060c09 0xc4100780 0xa0013003 0x00000000 0x30060a19 0xc4100780
0x10012003 0x00000280 0x10004401 0x0023c780 0x10013003 0x00000280 0xa0004e01 0x04200780
0x60004e01 0x00218780 0x30070005 0xc4100780 0xa0004405 0x04200780 0x40418000 0x20008a00
0x30060001 0xc4100780 0x20000205 0x04000780 0x30070005 0xc4100780 0x30060001 0xc4100780
0x30020e01 0xc4100780 0x2101ec0c 0x20008404 0x20000205 0x04000780 0x30020e01 0xc4100780
0x20000001 0x0400c780 0xd00e0001 0x80c00780 0x2101ec08 0x20008c04 0x20000001 0x04008780
0x00000205 0xc0000780 0x04045401 0xe4200780 0xd00e0001 0x80c00780 0x00000205 0xc0000780
0x04045401 0xe4200780 0xf0000001 0xe0000002
0x861ffe03 0x00000000 0xa0004c11 0x04200780
0x1000d201 0x0423c780 0x40080205 0x00000780
0x30100209 0xc4100780 0x1000d205 0x0423c780
0x60080009 0x00008780 0x30050a01 0xc4100780
0x2101ee0c 0x2142f004 0x20000021 0x0401c780
0x3003020d 0xac000780 0x300807fd 0x6c00c7c8
0xa002b003 0x00000000 0x1002a003 0x00000280
0xd0115805 0x20000780 0x2502e000 0x20009000
0x30020001 0xc4100780 0x2000ca01 0x04200780
0xd00e0025 0x80c00780 0x1400d401 0x0423c780
0x30001201 0xec000780 0x1002b003 0x00000780
0x1000f801 0x0403c780 0x308211fd 0x6c4107ca
0x00021009 0xc0000780 0x08001401 0xe4200780
0xa0040003 0x00000000 0x10040003 0x00000280
0x2000d201 0x04220780 0x300007fd 0x6c00c7c8
0x00020005 0xc0000780 0xa003f003 0x00000000
0x1003e003 0x00000280 0xd0115809 0x20000780
0x2108f20c 0x2902e000 0x20000001 0x0400c780
0x30020001 0xc4100780 0x2000ca01 0x04200780
0xd00e0009 0x80c00780 0x1800d401 0x0423c780
0x30000401 0xec000780 0x1003f003 0x00000780
0x1000f801 0x0403c780 0x04001401 0xe4200782
0xf0000001 0xe0000002 0x861ffe03 0x00000000 0xf0000001 0xe0000002 0x861ffe03 0x00000000
0xa0004c11 0x04200780 0x1000d201 0x0423c780 0x00000c09 0xc0000780 0xd8115005 0x20000780
0x40080205 0x00000780 0x3010020d 0xc4100780 0x3407c1fd 0x6420c7c8 0xa0054003 0x00000000
0x1000d205 0x0423c780 0x6008000d 0x0000c780 0x1400c001 0x0423c780 0x10053003 0x00000280
0x30050c01 0xc4100780 0x2101ee14 0x2143f004 0xa0004e09 0x04200780 0xa000440d 0x04200780
0x20000021 0x0401c780 0x30050215 0xac000780 0x40438408 0x20028a08 0x3007040d 0xc4100780
0x30080bfd 0x6c00c7c8 0xa002a003 0x00000000 0x30060409 0xc4100780 0x20000609 0x04008780
0x10029003 0x00000280 0xd0115805 0x20000780 0x30020e0d 0xc4100780 0x2102ec08 0x20028608
0x2503e000 0x20009000 0x30020001 0xc4100780 0x20008409 0x00000007 0xd00e0409 0x80c00780
0x2000ca01 0x04200780 0xd00e0025 0x80c00780 0x10054003 0x00000780 0x1000f809 0x0403c780
0x1400d401 0x0423c780 0x30001201 0xec000780 0x20400205 0x04000782 0x3001d205 0xac200780
0x1002a003 0x00000780 0x1000f801 0x0403c780 0x00021005 0xc0000780 0x04055401 0xe4208780
0x308211fd 0x6c4107ca 0x00021005 0xc0000780 0x00000c05 0xc0000780 0xd4117805 0x20000780
0x04001401 0xe4200780 0xa003f003 0x00000000 0x307c0205 0x8c000780 0x3483c1fd 0x6c6147c8
0x1003f003 0x00000280 0x2000d201 0x04220780 0x10000209 0x0403c780 0x10000e09 0x0403c280
0x30000bfd 0x6c00c7c8 0x00020005 0xc0000780 0x300203fd 0x6c00c7c8 0xa008e003 0x00000000
0xa003e003 0x00000000 0x1003d003 0x00000280 0x1000f829 0x0403c780 0x1008d003 0x00000280
0xd0115809 0x20000780 0x2108f214 0x2903e000 0x20000425 0x04000780 0x200a8409 0x00000003
0x20000001 0x04014780 0x30020001 0xc4100780 0x200a920d 0x00000003 0x00070a05 0xc0000780
0x2000ca01 0x04200780 0xd00e000d 0x80c00780 0x0002040d 0xc0000780 0x00020611 0xc0000780
0x1800d401 0x0423c780 0x30000601 0xec000780 0x2000002d 0x04004780 0xd4155009 0x20000780
0x1003e003 0x00000780 0x1000f801 0x0403c780 0x1900e208 0x1900e630 0x1900e00c 0x1900e400
0x04001401 0xe4200782 0xf0000001 0xe0000002 0x4d42e234 0x4d4ce630 0x1800ca09 0x0423c780
0x861ffe03 0x00000000 0x00000409 0xc0000780 0x6c03c00d 0x80234780 0x6c00c431 0x80230780
0x00021005 0xc0000780 0x04025401 0xe43f0780 0x1900e800 0x4d42ea34 0x1900ee08 0x200c860c
0xd8115005 0x20000780 0x3407c1fd 0x6420c7c8 0x6c00c831 0x80234780 0x1900ec00 0x4d42ee08
0xa0053003 0x00000000 0x10052003 0x00000280 0x20000631 0x04030780 0x6c00cc35 0x80208780
0x10004401 0x0023c780 0x60004e01 0x00218780 0x1900f208 0x1900f00c 0x1900f638 0x1900f400
0x30070009 0xc4100780 0x30060001 0xc4100780 0x00000c09 0xc0000780 0xd8116009 0x20000780
0x20000409 0x04000780 0x30020e01 0xc4100780 0x200d9830 0x1900e034 0x4c02d209 0x00218780
0x2102ec08 0x20028000 0x20008001 0x00000007 0x6c03d009 0x80208780 0x4d4ef60c 0x20029808
0xd00e0001 0x80c00780 0x10053003 0x00000780 0x6c00d401 0x8020c780 0x20000401 0x04000780
0x1000f801 0x0403c780 0x00021005 0xc0000782 0x300d0001 0xec000780 0x2040c001 0x04200784
0x04055401 0xe4200780 0xd8117805 0x20000780 0x301f0009 0xec100780 0x30010001 0xc4100780
0x3483c1fd 0x6c6087c8 0xd8115005 0x20000500 0xd0000401 0x04008780 0x20209225 0x00000003
0x3400c001 0x04204500 0x3000d201 0xac200500 0x30840001 0xac400780 0x300b13fd 0x6c0047c8
0x307c0015 0x8c000500 0x1000f815 0x0403c280 0x20001429 0x04000780 0xdc01000d 0x20000780
0x30070bfd 0x6c00c7c8 0xa008f003 0x00000000
0x1000f825 0x0403c780 0x1008d003 0x00000280
0xd811500d 0x20000780 0x2c00c029 0x0421c780
0x200a8e01 0x00000003 0x200a9405 0x00000003
0x00070c05 0xc0000780 0x04000039 0x40000780
0x2c00c035 0x04214780 0x0002000d 0xc0000780
0x00020211 0xc0000780 0x00001c05 0xc0000780
0xd4155005 0x20000780 0x1500e204 0x1500e60c
0x1500e008 0x1500e400 0x4d41e22c 0x4d43e60c
0x1400ca05 0x0423c780 0x6c02c009 0x8022c780
0x6c00c40d 0x8020c780 0x1500e800 0x4d41ea2c
0x1500ee04 0x20038408 0x6c00c80d 0x8022c780
0x1500ec00 0x4d41ee04 0x20000431 0x0400c780
0x6c00cc2d 0x80204780 0x1500f20c 0x1500f008
0x1500f604 0x1500f400 0xd8116005 0x20000780
0x200b982c 0x1500e030 0x4c03d20d 0x00218780
0x6c02d009 0x8020c780 0x4d41f604 0x20029608
0x6c00d401 0x80204780 0x20000401 0x04000780
0x300c0001 0xec000780 0x2040c001 0x04200784
0x301f0005 0xec100780 0x30010001 0xc4100780
0xd0000201 0x04008780 0x20209429 0x00000003
0x30840001 0xac400780 0x300d15fd 0x6c0047c8
0x20001225 0x04000780 0xdc01000d 0x20000780
0xd0010011 0x20000784 0x10069003 0x00000280 0xd0010011 0x20000784 0x10069003 0x00000280
0x1008f003 0x00000780 0x00070c05 0xc0000780 0x1008e003 0x00000780 0x00070a05 0xc0000780
0x04000039 0x40000780 0x0002100d 0xc0000782 0x00021009 0xc0000782 0x08025401 0xe4228780
0x0c025401 0xe4224780 0xdc095005 0x20000780 0xd809500d 0x20000780 0x1c00d001 0x0423c780
0x1400d001 0x0423c780 0x1400f005 0x0423c780 0x1c00f009 0x0423c780 0x2c00c001 0x04200780
0x2400c001 0x04200780 0x2400e005 0x04204780 0x2c00e009 0x04208780 0x20000001 0x04008780
0x20000001 0x04004780 0x0c025401 0xe4200780 0x08025401 0xe4200780 0x1d00e400 0x1d00ec08
0x1500e400 0x1500ec04 0x2500e000 0x2501e804 0x2d00e000 0x2d02e808 0x20000001 0x04008780
0x20000001 0x04004780 0x0c025401 0xe4200780 0x08025401 0xe4200780 0x1d00e200 0x2d00e000
0x1500e200 0x2500e000 0x0c025401 0xe4200780 0x08025401 0xe4200780 0x00000c09 0xc0000780
0xd8117805 0x20000780 0x347cc1fd 0x6c2147c8 0xd8117809 0x20000780 0x387cc1fd 0x6c2147c8
0xa00a9003 0x00000000 0x100a6003 0x00000280 0xa00a8003 0x00000000 0x100a5003 0x00000280
0x00001c05 0xc0000780 0xd4095005 0x20000780 0xd4095009 0x20000780 0x387cc1fd 0x6c2087c8
0x347cc1fd 0x6c2087c8 0x100a8003 0x00000280 0x100a7003 0x00000280 0x10018001 0x00000003
0x10018001 0x00000003 0x100a9003 0x00000780 0x100a8003 0x00000780 0x1000f801 0x0403c780
0x1000f801 0x0403c780 0xf0000001 0xe0000002 0x30850e09 0x64410782 0xd4095005 0x20000780
0x20018e05 0x00000003 0x40031409 0x00000780 0xa000040d 0x2c014780 0x30010219 0xec100780
0x60021609 0x00008780 0x30850e25 0x64410780 0x20018e09 0x00000003 0x407f860d 0x0007ffff
0x00001c05 0xc0000780 0xd4095005 0x20000780 0x2440c019 0x04218780 0x60020205 0x8000c780
0x3010040d 0xc4100780 0xa0001225 0x2c014780 0x30070c09 0xec000780 0x20000205 0x04008780
0x30010a09 0xec100780 0x6002140d 0x0000c780
0x407f9215 0x0007ffff 0x2542e004 0x20058608
0x30070205 0xec000780 0x20000205 0x04008780
0x40030009 0x00000780 0x60020209 0x00008780 0x40030009 0x00000780 0x60020209 0x00008780
0x30100409 0xc4100780 0x60020001 0x00008780 0x30100409 0xc4100780 0x60020001 0x00008780
0x00021009 0xc0000780 0x08025401 0xe4200780 0x00021009 0xc0000780 0x08025401 0xe4200780
@@ -576,8 +753,8 @@ code {
0x3400c001 0xac200780 0x3401d005 0xac200780 0x3400c001 0xac200780 0x3401d005 0xac200780
0x30010001 0xac000780 0x307c0ffd 0x640147c8 0x30010001 0xac000780 0x307c0ffd 0x640147c8
0x08025401 0xe4200780 0x30000003 0x00000280 0x08025401 0xe4200780 0x30000003 0x00000280
0x10004401 0x0023c780 0x60004e01 0x00218780 0x10004401 0x0023c780 0x60004e01 0x00214780
0x00021005 0xc0000780 0xd4095005 0x20000780 0x00021009 0xc0000780 0xd8095005 0x20000780
0x30060009 0xc4100780 0x1500e200 0x1500e604 0x30060009 0xc4100780 0x1500e200 0x1500e604
0x20000809 0x04008780 0x3400c00d 0xac200780 0x20000809 0x04008780 0x3400c00d 0xac200780
0x3401c405 0xac200780 0x30020401 0xc4100780 0x3401c405 0xac200780 0x30020401 0xc4100780
@@ -1546,166 +1723,228 @@ code {
code { code {
name = cudaComputeLPC name = cudaComputeLPC
lmem = 0 lmem = 0
smem = 1256 smem = 4140
reg = 12 reg = 10
bar = 1 bar = 1
const { const {
segname = const segname = const
segnum = 1 segnum = 1
offset = 0 offset = 0
bytes = 56 bytes = 64
mem { mem {
0x0000000f 0x0000001f 0x0000003f 0x00000040 0x000003ff 0x0000000f 0x7e800000 0x3c23d70a
0x00000001 0x00000020 0x7e800000 0x00000900 0x3f317218 0x00000001 0xffffffff 0x0000001f
0x00000480 0x00000240 0x0000000c 0x00000003 0x00000900 0x0000000c 0x00000480 0x00000240
0x3e800000 0x0000009e 0x00000003 0x3e800000 0x00000020 0x0000009e
} }
} }
bincode { bincode {
0xa000000d 0x04000780 0x308007fd 0x644107c8 0xd0800205 0x00400780 0xa0000205 0x04000780
0xa0016003 0x00000000 0x10016003 0x00000280 0xa0000011 0x04000780 0x30050201 0xc4100780
0x1000ca05 0x0423c780 0x1000ce01 0x0423c780 0x20000815 0x04000780 0x30810bfd 0x644107c8
0x41032e08 0x41012c10 0x30100409 0xc4100780 0xa0015003 0x00000000 0x00070205 0xc0000780
0x30100811 0xc4100780 0x60024e05 0x00208780 0x10015003 0x00000280 0x1000ca01 0x0423c780
0x60004c09 0x00210780 0x30070201 0xc4100780 0x40014e09 0x00200780 0x30100409 0xc4100780
0x30060205 0xc4100780 0x30070411 0xc4100780 0x60004e01 0x00208780 0x30070009 0xc4100780
0x30060409 0xc4100780 0x20000005 0x04004780 0x30060001 0xc4100780 0x20000409 0x04000780
0x30020601 0xc4100780 0x20028808 0x20018000 0x30020a01 0xc4100780 0x2102e808 0x20028000
0x2102e804 0x20018000 0xd00e0001 0x80c00780 0xd00e0001 0x80c00780 0x00020a09 0xc0000780
0x00020605 0xc0000780 0x04001201 0xe4200780 0x08001401 0xe4200780 0xf0000001 0xe0000002
0x861ffe03 0x00000000 0x3001cffd 0x6c2047c8
0xa004c003 0x00000000 0x10000209 0x0403c780
0x1004c003 0x00000280 0x200a8a01 0x00000003
0x00020009 0xc0000780 0x3004d3fd 0x6c2107d8
0x307c09fd 0x640087c8 0xa003b003 0x00000000
0x08002001 0xe43f0780 0x1003b003 0x00001100
0xa0004c0d 0x04200780 0x10004e01 0x0023c780
0x6000480d 0x0020c780 0x1000d201 0x0423c780
0x40070019 0x00000780 0x60060219 0x00018780
0x30100c19 0xc4100780 0x60060019 0x00018780
0x2101ee0d 0x00000003 0x20000c01 0x04010780
0x40010c1d 0x00000780 0x60000e1d 0x0001c780
0x30100e1d 0xc4100780 0x60000c1d 0x0001c780
0x20000e1d 0x04008780 0x30020e1d 0xc4100780
0x3007060d 0xc4100780 0x2106f218 0x2107ec1c
0xd00e0e21 0x80c00780 0x20208001 0x00000003
0xb800e021 0x00220780 0x300601fd 0x6c0047e8
0x08002001 0xe4220780 0x20000e1d 0x0400c780
0x10034003 0x00002280 0x1800f001 0x0423c782
0xd801000d 0x20000780 0xb800e001 0x00200780
0xbd006000 0xbd007000 0x08002001 0xe4200780
0x1800e401 0x0423c780 0xb800e001 0x00200780
0xb800e801 0x00200780 0xb800ec01 0x00200780
0x08002001 0xe4200780 0x1800e201 0x0423c680
0x0002040d 0xc0000680 0xb800e001 0x00200680
0x0c06d401 0xe4200680 0x20088409 0x00000003
0x3002cffd 0x6c2187e8 0x1001f003 0x00002280
0xf0000001 0xe0000002 0x861ffe03 0x00000000 0xf0000001 0xe0000002 0x861ffe03 0x00000000
0x307ccffd 0x6c2047c8 0x10062003 0x00000280 0x307c03fd 0x640147c8 0xa00f1003 0x00000000
0xa0004211 0x04200780 0x30820601 0x6c40c780 0x100f1003 0x00000280 0x00020a09 0xc0000780
0x30830805 0x64410780 0xd0840001 0x04400780 0xd81b580d 0x20000780 0x1d00e008 0x1d00e000
0xd0840205 0x04400780 0xd0010015 0x04000780 0x08069401 0xe4208780 0xd01b500d 0x20000780
0x308107fd 0x6c40c7c8 0x3003d1fd 0x6c2107d8 0x08065401 0xe43f0780 0x307ccffd 0x6c20c7c8
0x00020605 0xc0000780 0x1000f819 0x0403c780 0x1c00c00d 0x0423c780 0x1008a003 0x00000280
0x2101ee05 0x00000003 0xa003a003 0x00000000 0x10288009 0x00000003 0x0000040d 0xc0000780
0x10039003 0x00001100 0xa0004c09 0x04200780 0x213fee1d 0x0fffffff 0x1000f809 0x0403c780
0x10004e01 0x0023c780 0x60004809 0x00208780 0xd01a5011 0x20000780 0xb08207fd 0x605107c8
0x1000d001 0x0423c780 0x4005001d 0x00000780 0x10000619 0x0403c780 0xa000c021 0xe4204784
0x6004021d 0x0001c780 0x30100e1d 0xc4100780 0xc08d1021 0x00400680 0xc08d0c19 0x00400680
0x60040001 0x0001c780 0x20000001 0x0400c780 0x90000c24 0x20428e18 0xc0091021 0x00000780
0x40010409 0x00000780 0x60000609 0x00008780 0xd01a5011 0x20000780 0x30060bfd 0x6c0187c8
0x30100409 0xc4100780 0x60000401 0x00008780 0xe008c00d 0x0020c784 0xd81a5811 0x20000500
0x20000001 0x04018780 0x30020001 0xc4100780 0x1000c019 0x0423c504 0xe0001019 0x00018500
0x2000cc01 0x04200780 0xd00e0001 0x80c00780 0xe008c001 0x00200504 0x08069401 0xe4218500
0x1003a003 0x00000780 0x1000f801 0x0403c780 0x20400419 0x04014780 0x00020c11 0xc0000780
0x04017401 0xe4200782 0x861ffe03 0x00000000 0xd0194811 0x20000784 0x40050825 0x00000780
0x307c0bfd 0x6c0087e8 0xd407d00d 0x20002500 0x300505fd 0x6c00c7c8 0xc008c019 0x00200784
0xd405d009 0x20002500 0x1c00c001 0x0423e500 0x60040a25 0x00024780 0x1000f819 0x0403c280
0xb800c001 0x00202500 0x04017401 0xe4202500 0x30101225 0xc4100780 0x300505fd 0x6c0147c8
0x861ffe03 0x00000000 0xa005d003 0x00000000 0xb0000c21 0x00020780 0x60040825 0x00024780
0x1005d003 0x00000100 0x308509fd 0x6440c7e8 0x10000c21 0x0403c280 0xd8195011 0x20000780
0x1004b003 0x00002280 0xd406d00d 0x20000780 0x20000425 0x04024780 0xb000c019 0x00220784
0xd405d009 0x20000780 0x1d00e000 0xb9006000 0x30011221 0xec100780 0x08065401 0xe4218780
0x04017401 0xe4200780 0xd405d009 0x20000780 0x20000a19 0x04020780 0xa000c021 0xe4204784
0x1800e001 0x0423c780 0xb800c001 0x00200780 0x00020c11 0xc0000780 0x00023401 0xe4220784
0x04017401 0xe4200780 0x1900f000 0xb9006000 0x20018409 0x00000003 0x0c070201 0xe420c780
0x04017401 0xe4200780 0x1900e800 0xb9006000 0x3002cffd 0x6c2147c8 0xdc00080d 0x20000780
0x04017401 0xe4200780 0x1900e400 0xb9006000 0x1005e003 0x00000280 0x1008b003 0x00000780
0x04017401 0xe4200780 0x1900e200 0xb9006000 0x213fee1d 0x0fffffff 0xa0000a09 0x44014780
0x307c07fd 0x6c0147e8 0x04017401 0xe4200780 0x3005cffd 0x6c20c7c8 0x10000401 0x0403c780
0x1005d003 0x00002280 0xd005d00d 0x20000780 0xa0000e01 0x44014280 0x1000ce0d 0x0423c780
0x00020c09 0xc0000780 0x1c00c001 0x0423c780 0x08079601 0xe4200780 0x3003d1fd 0x6c2187c8
0x0800b201 0xe4200780 0xf0000001 0xe0000002 0x0807d601 0xe43f0780 0x100f1003 0x00000280
0x20018c19 0x00000003 0x300603fd 0x6c0147e8 0x3005cffd 0x6c20c7c8 0xa00a2003 0x00000000
0x10025003 0x00002280 0x10063003 0x00000780 0x1009d003 0x00000280 0xd81c580d 0x20000780
0x308107fd 0x6c40c7c8 0x30000003 0x00000100 0x1c00c001 0x0423c780 0x90000001 0x60000780
0x00020605 0xc0000780 0xd402d009 0x20000780 0xc0180001 0x03f31723 0xe00a0401 0x03c23d73
0x1800c001 0x0423c780 0x0400f401 0xe4200780 0x100a2003 0x00000780 0xd01c580d 0x20000780
0x1800c001 0x0423c780 0x04013401 0xe4200780 0x1c00c009 0x0423c780 0x10008001 0x03f80003
0xd002c809 0x20000780 0x04003201 0xe43f0780 0x90000409 0x60000780 0xe0180401 0x03f31723
0x307ccffd 0x6c20c7c8 0x30020611 0xc4100780 0x08071601 0xe4200782 0xd01c580d 0x20000780
0x1800c009 0x0423c780 0x30000003 0x00000280 0x1c00c009 0x0423c780 0x10008001 0x03f80003
0x307c07fd 0x6c0087c8 0x213fee15 0x0fffffff 0x90000409 0x60000780 0xe0180401 0x03f31723
0x1000f819 0x0403c780 0xd004d005 0x20000780 0x08075601 0xe4200780 0x30010819 0xc4100780
0xb08605fd 0x605107d8 0x10000401 0x0403c780 0x1002801d 0x00000003 0x301f0e01 0xec100780
0xa400c005 0xe4204780 0xc08c0205 0x00401680 0xd0850001 0x04400780 0x20000001 0x0401c780
0xc08c0001 0x00401680 0x90000000 0xc0000200 0x30010001 0xec100780 0x307c01fd 0x6c00c7c8
0xd004d005 0x20000780 0x20400a1d 0x04018780 0x100d2003 0x00000280 0xd0040009 0x04000780
0xc400c005 0x0020c780 0x300707fd 0x6c0187d8 0x307c05fd 0x640087c8 0x203f8009 0x0fffffff
0xb0000409 0x00004780 0xa0089003 0x00000000 0xd0040409 0x04000780 0x20428c08 0x2000840c
0x10089003 0x00001280 0x00000805 0xc0000780 0x0002060d 0xc0000780 0xdc1c5811 0x20000780
0xd403d00d 0x20000780 0xd404d809 0x20000780 0x00020409 0xc0000780 0x1000c009 0x0423c784
0xcc00c005 0x0020c780 0xc800c01d 0x0020c780 0xd81c5811 0x20000780 0xb002c00d 0x60218784
0xb9016004 0xbd07601c 0x04013401 0xe4204780 0x10000011 0x00000003 0xa0000615 0x08014780
0x0400f401 0xe421c780 0x20400c05 0x0400c782 0x10001811 0x20408280 0xd00509fd 0x000087d8
0x00020205 0xc0000780 0x30030dfd 0x6c00c7d8 0xa00cf003 0x00000000 0x100cf003 0x00001280
0xc400f005 0x0020c780 0x1000f805 0x0403d280 0xdc1c5811 0x20000780 0x1000c00d 0x0423c784
0x30030dfd 0x6c0147d8 0xb0000201 0x00000780 0xd81c5811 0x20000780 0x1000c009 0x0423c784
0x10000201 0x0403d280 0x00000809 0xc0000780 0x08071601 0xe420c780 0x0c071601 0xe4208780
0xb800f201 0x00200780 0x307c0dfd 0x6c0087d8 0xd81e5811 0x20000780 0xa000c00d 0x8c264784
0x08003201 0xe4200780 0xa0000c01 0x44065500 0xdc1e5811 0x20000780 0x1000c009 0x0423c784
0x30170001 0xec101500 0x31000001 0x04435500 0x08079601 0xe4208780 0xa0000609 0x44014780
0x10000a01 0x2440d280 0x3087e1fd 0x6c60c7d8 0x0c079601 0xe4208780 0x30010001 0xec100782
0x3088e01d 0x6c60c780 0x100d8005 0x00000003 0x307c01fd 0x6c0107d8 0x100b3003 0x00001280
0xd0840e21 0x04400780 0x10001405 0x2440d280 0x30010e1d 0xc4100780 0x30870ffd 0x6c40c7c8
0x3089e01d 0x6c60c780 0x20400205 0x04020780 0x100ab003 0x00000280 0x1010800d 0x00000003
0xd0840e1d 0x04400780 0x00000809 0xc0000780 0x203f8601 0x0fffffff 0xd0040001 0x04000780
0x3000ea01 0x04200780 0x20400205 0x0401c780 0x20408c00 0x20038008 0x0002040d 0xc0000780
0xa800f21d 0xc4304780 0x20018001 0x00000003 0xdc1c5811 0x20000780 0x00020009 0xc0000780
0x3001ea05 0xac200780 0xc0000e1d 0x04700003 0x1000c001 0x0423c784 0xd81c5811 0x20000780
0x30010005 0xac000780 0xa0000e01 0x8c0047d0 0xb000c1fd 0x602187cc 0xa00ee003 0x00000000
0x308b0205 0x8c400780 0xa0000001 0x44065680 0x100ee003 0x00000100 0xdc1c5811 0x20000780
0x30170001 0xec101680 0x31000001 0x04435680 0x1000c009 0x0423c784 0xd81c5811 0x20000780
0x10000a01 0x2440d100 0x2000001d 0x04004780 0x1000c001 0x0423c784 0x08071601 0xe4208780
0x30030c01 0x6c0187d0 0x30218e21 0x00000003 0x0c071601 0xe4200780 0xd81e5811 0x20000780
0xd084001d 0x04400780 0x00000809 0xc0000780 0xa000c009 0x8c264784 0xdc1e5811 0x20000780
0x40080e01 0x00018780 0x08007201 0xe4200780 0x1000c001 0x0423c784 0x08079601 0xe4200780
0xd801c80d 0x20000780 0x1c00e001 0x0423c780 0xa0000401 0x44014780 0x0c079601 0xe4200780
0x3c00c001 0x8c200780 0x08007201 0xe4200780 0x3001060d 0xec100782 0x307c07fd 0x6c0107c8
0x1c00d001 0x0423c780 0x3c00c001 0x8c200780 0x100d6003 0x00000280 0xf0000001 0xe0000002
0x08007201 0xe4200780 0x1c00c801 0x0423c780 0x861ffe03 0x00000000 0x3001d1fd 0x6c20c7c8
0x3c00c001 0x8c200780 0x08007201 0xe4200780 0x10000201 0x0403c780 0x30000003 0x00000280
0x1c00c401 0x0423c780 0x3c00c001 0x8c200780 0x200a8a09 0x00000003 0x200a8205 0x00000003
0x08007201 0xe4200780 0x1c00c201 0x0423c780 0x0002040d 0xc0000780 0x00020209 0xc0000780
0x3c00c001 0x8c200780 0x08007201 0xe4200780 0x307c09fd 0x640087c8 0xd81e0811 0x20000780
0xd001c809 0x20000780 0x390fe001 0x00000003 0xa000c00d 0x8c264784 0x30040605 0x640187e0
0x30800021 0xac400780 0x10018001 0x00000003 0xa00003fd 0x0c0147d8 0xa010d003 0x00000000
0x307c1021 0x8c000780 0x30080025 0xc4000780 0x1010c003 0x00002100 0x20018605 0x00000003
0xa0001225 0x44014780 0x30010001 0xc4000780 0x40030c09 0x00000780 0x60020e09 0x00008780
0xc409f225 0x00200780 0x203f8005 0x0fffffff 0x30100409 0xc4100780 0x60020c05 0x00008780
0xa0001225 0xac004780 0x30090205 0xac000780 0x30010205 0xec100780 0x20018604 0x20448204
0x30008001 0x00000003 0x30010025 0x8c000780 0x00020211 0xc0000780 0xd008d011 0x20000784
0xa00e3003 0x00000000 0x100e3003 0x00001100 0x1000c005 0x0423c784 0x1010d003 0x00000780
0x1100ee04 0x1100ea00 0x41032c28 0x41012e2c 0x1000f805 0x0403c780 0xdc010011 0x20000782
0x30101429 0xc4100780 0x3010162d 0xc4100780 0xc0000209 0x04700003 0xa0000409 0x8c004780
0x60024c05 0x00228780 0x60004e01 0x0022c780 0x301f0415 0xec100780 0xd0050409 0x04008780
0x20008200 0x20008c00 0x30070005 0xc4100780 0x0c002001 0xe4208780 0x1000d015 0x0423c784
0x30060001 0xc4100780 0x20008200 0x2100e800 0x1c00f009 0x0423c780 0xd005c015 0x04204784
0x20000801 0x04000780 0x20008001 0x00000007 0xdc02e009 0x04204780 0xd0050409 0x04004780
0xd00e0025 0xa0c00780 0xf0000001 0xe0000002 0x0c002001 0xe4208780 0x1c00e409 0x0423c780
0xa00f2003 0x00000000 0x100f2003 0x00000100 0x1c00ec15 0x0423c780 0xdc02e009 0x04204780
0x1100ee04 0x1100ea00 0x41032c28 0x41012e2c 0xdc05e815 0x04204780 0xd0050409 0x04004780
0x30101429 0xc4100780 0x3010162d 0xc4100780 0x307c07fd 0x6c0087e8 0x0c002001 0xe4208780
0x60024c05 0x00228780 0x60004e01 0x0022c780 0xa0000609 0x44066500 0x30170409 0xec102500
0x20008200 0x20008c00 0x30070005 0xc4100780 0x31000415 0x0443e500 0x10001c15 0x2440e280
0x30060001 0xc4100780 0x20008200 0x2100e800 0x1400f609 0x0423c780 0xd402f409 0x042047e0
0x20088001 0x00000003 0xd00e0021 0xa0c00780 0xa0000409 0x44066680 0x30170409 0xec102680
0x301f1201 0xec100782 0xd0001201 0x040087d0 0x31000409 0x0443e680 0x10001c09 0x2440e100
0xa0000001 0x44065680 0x30170001 0xec101680 0x3088e3fd 0x6c60c7e8 0x308ae21d 0x6c60c780
0x31000001 0x04435680 0x10000a01 0x2440d100 0x100d8019 0x00000003 0xd0850e1d 0x04400780
0x30218001 0x00000003 0x40070001 0x00018780 0x10001219 0x2440e280 0x308be221 0x6c60c780
0x00000805 0xc0000780 0x04007201 0xe4200780 0x20400c19 0x0401c780 0xd085101d 0x04400780
0xd401c809 0x20000780 0x1800e001 0x0423c780 0x3000ec15 0x04214780 0x20400c19 0x0401c780
0x3800c001 0x8c200780 0x04007201 0xe4200780 0x20018a15 0x00000003 0x3006ec19 0xac200780
0x1800d001 0x0423c780 0x3800c001 0x8c200780 0x30060a15 0xac000780 0x308c0a15 0x8c400780
0x04007201 0xe4200780 0x1800c801 0x0423c780 0x20000409 0x04014780 0x202e8409 0x0fffffff
0x3800c001 0x8c200780 0x04007201 0xe4200780 0x30810419 0xac400780 0x10018009 0x00000003
0x1800c401 0x0423c780 0x3800c001 0x8c200780 0x307c0c19 0x8c000780 0x3006041d 0xc4000780
0x04007201 0xe4200780 0x1800c201 0x0423c780 0x203f8a21 0x0fffffff 0xa0000e15 0x44014780
0x3800c001 0x8c200780 0x04007201 0xe4200780 0x30080409 0xc4000780 0xc0010a05 0x00000780
0xa011c003 0x00000000 0x1011c003 0x00000100 0x203f8415 0x0fffffff 0xa0000205 0x8c004780
0x1100ee04 0x1100ea00 0x41032c1c 0x41012e20 0x30050205 0xac000780 0x30008409 0x00000003
0x30100e1d 0xc4100780 0x30101021 0xc4100780 0x30020215 0x8c000780 0x301f0a05 0xec100780
0x60024c05 0x0021c780 0x60004e01 0x00220780 0xd0010a05 0x04008780 0x0c002001 0xe4204780
0x20008200 0x20008c00 0x30070005 0xc4100780 0xdc010011 0x20000780 0x1000d009 0x0423c784
0x30060001 0xc4100780 0x20000201 0x04000780 0x1c00f005 0x0423c780 0xd002c009 0x04204784
0xd001c805 0x20000780 0x2100e804 0x1500e000 0xdc01e005 0x04204780 0xd0020205 0x04004780
0x200c8205 0x00000003 0xd00e0201 0xa0c00780 0x0c002001 0xe4204780 0x1c00e405 0x0423c780
0xf0000001 0xe0000002 0x20018c19 0x00000003 0x1c00ec09 0x0423c780 0xdc01e005 0x04204780
0x3006cffd 0x6c2147d8 0x10073003 0x00001280 0xdc02e809 0x04204780 0xd0020205 0x04004780
0x0c002001 0xe4204780 0x1400f605 0x0423c780
0xd401f405 0x042047e0 0xa0000205 0x44066680
0x30170205 0xec102680 0x3100021d 0x0443e680
0x10001c1d 0x2440e100 0xa0187003 0x00000000
0x10187003 0x00000100 0x1100f008 0x1100ea04
0x41052c20 0x41032e24 0x30101021 0xc4100780
0x30101225 0xc4100780 0x60044c09 0x00220780
0x60024e05 0x00224780 0x20018404 0x20018004
0x30070209 0xc4100780 0x30060205 0xc4100780
0x20018404 0x2101e804 0x20088205 0x00000003
0xd00e0219 0xa0c00780 0x10187003 0x00000100
0x1100f008 0x1100ea04 0x41052c18 0x41032e20
0x30100c19 0xc4100780 0x30101021 0xc4100780
0x60044c09 0x00218780 0x60024e05 0x00220780
0x20018404 0x20018004 0x30070209 0xc4100780
0x30060205 0xc4100780 0x20018404 0x2101e808
0x30218e05 0x00000003 0x200c8409 0x00000003
0xd00e0405 0xa0c00780 0x10187003 0x00000100
0x1100f008 0x1100ea04 0x41052c18 0x41032e1c
0x30100c19 0xc4100780 0x30100e1d 0xc4100780
0x60044c09 0x00218780 0x60024e05 0x0021c780
0x20018404 0x20018004 0x30070209 0xc4100780
0x30060205 0xc4100780 0x20000409 0x04004780
0x20018605 0x00000003 0x2000c809 0x04208780
0xd00e0405 0xa0c00780 0xf0000001 0xe0000002
0xa0198003 0x00000000 0x10198003 0x00001100
0x1100f008 0x1100ea04 0x41052c0c 0x41032e18
0x3010060d 0xc4100780 0x30100c19 0xc4100780
0x60044c09 0x0020c780 0x60024e05 0x00218780
0x20018404 0x20018004 0x30070209 0xc4100780
0x30060205 0xc4100780 0x20000405 0x04004780
0x30020809 0xc4100780 0x2101e804 0x20018404
0x20008205 0x00000007 0xd00e0215 0xa0c00780
0xd8004009 0x20000782 0x20088001 0x00000003
0x3000d1fd 0x6c2107d8 0x100fb003 0x00001280
0xf0000001 0xe0000001 0xf0000001 0xe0000001
} }
} }