experiment with Burg LPC method

This commit is contained in:
chudov
2009-09-28 00:20:46 +00:00
parent bbf6d2c328
commit 3c287e2ede
3 changed files with 781 additions and 133 deletions

View File

@@ -221,6 +221,8 @@ namespace CUETools.Codecs.FlaCuda
cuda.Free(cudaWindow);
task1.Dispose();
task2.Dispose();
cuda.UnloadModule();
cuda.DestroyContext();
cuda.Dispose();
inited = false;
}
@@ -241,6 +243,8 @@ namespace CUETools.Codecs.FlaCuda
cuda.Free(cudaWindow);
task1.Dispose();
task2.Dispose();
cuda.UnloadModule();
cuda.DestroyContext();
cuda.Dispose();
inited = false;
}
@@ -1009,7 +1013,7 @@ namespace CUETools.Codecs.FlaCuda
int index = ch + iFrame * channels;
if (task.BestResidualTasks[index].size < 0)
throw new Exception("internal error");
if (frame.blocksize > 4 && frame.subframes[ch].best.size > task.BestResidualTasks[index].size)
if (frame.blocksize > Math.Max(4, eparams.max_prediction_order) && frame.subframes[ch].best.size > task.BestResidualTasks[index].size)
{
frame.subframes[ch].best.type = (SubframeType)task.BestResidualTasks[index].type;
frame.subframes[ch].best.size = (uint)task.BestResidualTasks[index].size;
@@ -1094,6 +1098,14 @@ namespace CUETools.Codecs.FlaCuda
cuda.SetParameterSize(task.cudaComputeLPC, (uint)(sizeof(uint) * 3) + sizeof(uint) * 2);
cuda.SetFunctionBlockShape(task.cudaComputeLPC, (autocorPartCount + 31) & ~31, 1, 1);
cuda.SetParameter(task.cudaComputeLPCLattice, 0, (uint)task.cudaResidualTasks.Pointer);
cuda.SetParameter(task.cudaComputeLPCLattice, 1 * sizeof(uint), (uint)task.nResidualTasksPerChannel);
cuda.SetParameter(task.cudaComputeLPCLattice, 2 * sizeof(uint), (uint)task.cudaSamples.Pointer);
cuda.SetParameter(task.cudaComputeLPCLattice, 3 * sizeof(uint), (uint)task.frameSize);
cuda.SetParameter(task.cudaComputeLPCLattice, 4 * sizeof(uint), (uint)eparams.max_prediction_order);
cuda.SetParameterSize(task.cudaComputeLPCLattice, 5U * sizeof(uint));
cuda.SetFunctionBlockShape(task.cudaComputeLPCLattice, 256, 1, 1);
cuda.SetParameter(task.cudaEstimateResidual, sizeof(uint) * 0, (uint)task.cudaResidualOutput.Pointer);
cuda.SetParameter(task.cudaEstimateResidual, sizeof(uint) * 1, (uint)task.cudaSamples.Pointer);
cuda.SetParameter(task.cudaEstimateResidual, sizeof(uint) * 2, (uint)task.cudaResidualTasks.Pointer);
@@ -1130,10 +1142,15 @@ namespace CUETools.Codecs.FlaCuda
// issue work to the GPU
cuda.LaunchAsync(cudaChannelDecorr, (task.frameCount * task.frameSize + 255) / 256, channels == 2 ? 1 : channels, task.stream);
if (task.frameSize <= 512 && _windowcount == 1)
cuda.LaunchAsync(task.cudaComputeLPCLattice, 1, channelsCount * task.frameCount, task.stream);
else
{
if (eparams.do_wasted)
cuda.LaunchAsync(task.cudaFindWastedBits, channelsCount * task.frameCount, 1, task.stream);
cuda.LaunchAsync(task.cudaComputeAutocor, autocorPartCount, task.nAutocorTasksPerChannel * channelsCount * task.frameCount, task.stream);
cuda.LaunchAsync(task.cudaComputeLPC, 1, task.nAutocorTasksPerChannel * channelsCount * task.frameCount, task.stream);
}
cuda.LaunchAsync(task.cudaEstimateResidual, residualPartCount, task.nResidualTasksPerChannel * channelsCount * task.frameCount / threads_y, task.stream);
cuda.LaunchAsync(task.cudaChooseBestMethod, 1, channelsCount * task.frameCount, task.stream);
if (channels == 2 && channelsCount == 4)
@@ -1205,9 +1222,9 @@ namespace CUETools.Codecs.FlaCuda
_windowsize = task.frameSize;
_windowcount = 0;
calculate_window(window, lpc.window_welch, WindowFunction.Welch);
calculate_window(window, lpc.window_flattop, WindowFunction.Flattop);
calculate_window(window, lpc.window_tukey, WindowFunction.Tukey);
calculate_window(window, lpc.window_hann, WindowFunction.Hann);
calculate_window(window, lpc.window_flattop, WindowFunction.Flattop);
calculate_window(window, lpc.window_bartlett, WindowFunction.Bartlett);
if (_windowcount == 0)
throw new Exception("invalid windowfunction");
@@ -1827,6 +1844,7 @@ namespace CUETools.Codecs.FlaCuda
public CUfunction cudaFindWastedBits;
public CUfunction cudaComputeAutocor;
public CUfunction cudaComputeLPC;
public CUfunction cudaComputeLPCLattice;
public CUfunction cudaEstimateResidual;
public CUfunction cudaChooseBestMethod;
public CUfunction cudaCopyBestMethod;
@@ -1907,6 +1925,7 @@ namespace CUETools.Codecs.FlaCuda
cudaChannelDecorr2 = cuda.GetModuleFunction("cudaChannelDecorr2");
cudaFindWastedBits = cuda.GetModuleFunction("cudaFindWastedBits");
cudaComputeLPC = cuda.GetModuleFunction("cudaComputeLPC");
cudaComputeLPCLattice = cuda.GetModuleFunction("cudaComputeLPCLattice");
cudaEstimateResidual = cuda.GetModuleFunction("cudaEstimateResidual");
cudaChooseBestMethod = cuda.GetModuleFunction("cudaChooseBestMethod");
cudaCopyBestMethod = cuda.GetModuleFunction("cudaCopyBestMethod");

View File

@@ -251,7 +251,7 @@ extern "C" __global__ void cudaComputeLPC(
shared.ldr[tid] += (tid < order) * __fmul_rz(reff, shared.ldr[order - 1 - tid]) + (tid == order) * reff;
// Quantization
int precision = 13 - (order > 8);
int precision = 13 - (order > 8) - (shared.task.blocksize <= 2304) - (shared.task.blocksize <= 1152) - (shared.task.blocksize <= 576);
int taskNo = shared.task.residualOffs + order;
shared.bits[tid] = __mul24((33 - __clz(__float2int_rn(fabs(shared.ldr[tid]) * (1 << 15))) - precision), tid <= order);
shared.bits[tid] = max(shared.bits[tid], shared.bits[tid + 16]);
@@ -267,7 +267,7 @@ extern "C" __global__ void cudaComputeLPC(
output[taskNo].coefs[tid] = coef;
if (tid == 0)
output[taskNo].shift = sh;
shared.bits[tid] = 33 - max(__clz(coef),__clz(-1 ^ coef));
shared.bits[tid] = __mul24(33 - max(__clz(coef),__clz(-1 ^ coef)), tid <= order);
shared.bits[tid] = max(shared.bits[tid], shared.bits[tid + 16]);
shared.bits[tid] = max(shared.bits[tid], shared.bits[tid + 8]);
shared.bits[tid] = max(shared.bits[tid], shared.bits[tid + 4]);
@@ -280,11 +280,11 @@ extern "C" __global__ void cudaComputeLPC(
}
}
#define SUM32(buf,tid) buf[tid] += buf[tid + 16]; buf[tid] += buf[tid + 8]; buf[tid] += buf[tid + 4]; buf[tid] += buf[tid + 2]; buf[tid] += buf[tid + 1];
#define SUM64(buf,tid) if (tid < 32) buf[tid] += buf[tid + 32]; __syncthreads(); if (tid < 32) SUM32(buf,tid)
#define SUM128(buf,tid) if (tid < 64) buf[tid] += buf[tid + 64]; __syncthreads(); SUM64(buf,tid)
#define SUM256(buf,tid) if (tid < 128) buf[tid] += buf[tid + 128]; __syncthreads(); SUM128(buf,tid)
#define SUM512(buf,tid) if (tid < 256) buf[tid] += buf[tid + 256]; __syncthreads(); SUM256(buf,tid)
#define SUM32(buf,tid,op) buf[tid] op buf[tid + 16]; buf[tid] op buf[tid + 8]; buf[tid] op buf[tid + 4]; buf[tid] op buf[tid + 2]; buf[tid] op buf[tid + 1];
#define SUM64(buf,tid,op) if (tid < 32) buf[tid] op buf[tid + 32]; __syncthreads(); if (tid < 32) SUM32(buf,tid,op)
#define SUM128(buf,tid,op) if (tid < 64) buf[tid] op buf[tid + 64]; __syncthreads(); SUM64(buf,tid,op)
#define SUM256(buf,tid,op) if (tid < 128) buf[tid] op buf[tid + 128]; __syncthreads(); SUM128(buf,tid,op)
#define SUM512(buf,tid,op) if (tid < 256) buf[tid] op buf[tid + 256]; __syncthreads(); SUM256(buf,tid,op)
#define FSQR(s) ((s)*(s))
@@ -298,14 +298,21 @@ extern "C" __global__ void cudaComputeLPCLattice(
{
__shared__ struct {
encodeResidualTaskStruct task;
union {
volatile float F[512];
volatile int tmpi[512];
};
union {
volatile float B[512];
volatile int smp[512];
};
volatile float tmp[256];
volatile float arp[32];
volatile float rc[32];
volatile int bits[32];
int bits[32];
volatile float PE[33];
volatile float DEN, reff;
int actual_bits;
} shared;
// fetch task data
@@ -314,31 +321,48 @@ extern "C" __global__ void cudaComputeLPCLattice(
__syncthreads();
// F = samples; B = samples
shared.F[threadIdx.x] = threadIdx.x < frameSize ? samples[shared.task.samplesOffs + threadIdx.x] >> shared.task.wbits : 0.0f;
shared.F[threadIdx.x + 256] = threadIdx.x + 256 < frameSize ? samples[shared.task.samplesOffs + threadIdx.x + 256] >> shared.task.wbits : 0.0f;
shared.tmpi[threadIdx.x] = shared.smp[threadIdx.x] = threadIdx.x < frameSize ? samples[shared.task.samplesOffs + threadIdx.x] : 0;
shared.tmpi[threadIdx.x + 256] = shared.smp[threadIdx.x + 256] = threadIdx.x + 256 < frameSize ? samples[shared.task.samplesOffs + threadIdx.x + 256] : 0;
__syncthreads();
SUM512(shared.tmpi,threadIdx.x,|=);
if (threadIdx.x == 0)
shared.task.wbits = max(0,__ffs(shared.tmpi[0]) - 1);
__syncthreads();
shared.tmpi[threadIdx.x] = shared.smp[threadIdx.x] ^ (shared.smp[threadIdx.x] >> 31);
shared.tmpi[threadIdx.x + 256] = shared.smp[threadIdx.x + 256] ^ (shared.smp[threadIdx.x + 256] >> 31);
SUM512(shared.tmpi,threadIdx.x,|=);
if (threadIdx.x == 0)
shared.actual_bits = 32 - __clz(shared.tmpi[0]) - shared.task.wbits;
__syncthreads();
shared.F[threadIdx.x] = shared.smp[threadIdx.x] >> shared.task.wbits;
shared.F[threadIdx.x + 256] = shared.smp[threadIdx.x + 256] >> shared.task.wbits;
shared.B[threadIdx.x] = shared.F[threadIdx.x];
shared.B[threadIdx.x + 256] = shared.F[threadIdx.x + 256];
__syncthreads();
// DEN = F*F'
shared.tmp[threadIdx.x] = FSQR(shared.F[threadIdx.x]) + FSQR(shared.F[threadIdx.x + 256]);
__syncthreads();
SUM256(shared.tmp,threadIdx.x);
if (threadIdx.x == 0)
{
shared.DEN = shared.tmp[0];
shared.PE[0] = shared.tmp[0] / frameSize;
}
__syncthreads();
for (int order = 1; order <= max_order; order++)
{
// reff = F(order+1:frameSize) * B(1:frameSize-order)' / DEN
float f1 = (threadIdx.x + order < frameSize) * shared.F[order + threadIdx.x];
float f2 = (threadIdx.x + 256 + order < frameSize) * shared.F[order + threadIdx.x + 256];
float f1 = (threadIdx.x + order < frameSize) * shared.F[threadIdx.x + order];
float f2 = (threadIdx.x + 256 + order < frameSize) * shared.F[threadIdx.x + 256 + order];
// DEN = F(order+1:frameSize) * F(order+1:frameSize)' + B(1:frameSize-order) * B(1:frameSize-order)' (BURG)
shared.tmp[threadIdx.x] = FSQR(f1) + FSQR(f2);
shared.tmp[threadIdx.x] += (threadIdx.x < frameSize - order) * FSQR(shared.B[threadIdx.x])
+ (threadIdx.x + 256 < frameSize - order) * FSQR(shared.B[threadIdx.x + 256]);
__syncthreads();
SUM256(shared.tmp, threadIdx.x, +=);
if (threadIdx.x == 0)
{
shared.DEN = shared.tmp[0] / 2;
shared.PE[order-1] = shared.tmp[0] / 2 / (frameSize - order + 1);
}
__syncthreads();
shared.tmp[threadIdx.x] = f1 * shared.B[threadIdx.x] + f2 * shared.B[threadIdx.x + 256];
__syncthreads();
SUM256(shared.tmp, threadIdx.x);
SUM256(shared.tmp, threadIdx.x, +=);
if (threadIdx.x == 0)
shared.reff = shared.tmp[0] / shared.DEN;
__syncthreads();
@@ -349,8 +373,8 @@ extern "C" __global__ void cudaComputeLPCLattice(
// Levinson-Durbin recursion
// arp(1:order-1) = arp(1:order-1) - reff * arp(order-1:-1:1)
if (threadIdx.x < 32)
shared.arp[threadIdx.x] -= (threadIdx.x < order - 1) * __fmul_rz(shared.reff, shared.arp[order - 2 - threadIdx.x]);
if (threadIdx.x < order - 1)
shared.arp[threadIdx.x] -= shared.reff * shared.arp[order - 2 - threadIdx.x];
// F1 = F(order+1:frameSize) - reff * B(1:frameSize-order)
// B(1:frameSize-order) = B(1:frameSize-order) - reff * F(order+1:frameSize)
@@ -367,48 +391,46 @@ extern "C" __global__ void cudaComputeLPCLattice(
}
__syncthreads();
// DEN = F(order+1:frameSize) * F(order+1:frameSize)' + B(1:frameSize-order) * B(1:frameSize-order)' (BURG)
shared.tmp[threadIdx.x] = (threadIdx.x < frameSize - order) * (FSQR(shared.F[threadIdx.x + order]) + FSQR(shared.B[threadIdx.x]))
+ (threadIdx.x + 256 < frameSize - order) * (FSQR(shared.F[threadIdx.x + 256 + order]) + FSQR(shared.B[threadIdx.x + 256]));
__syncthreads();
SUM256(shared.tmp, threadIdx.x);
if (threadIdx.x == 0)
{
shared.DEN = shared.tmp[0] / 2;
shared.PE[order] = shared.tmp[0] / 2 / (frameSize - order);
}
__syncthreads();
// Quantization
if (threadIdx.x < 32)
{
int precision = 10 - (order > 8) - min(2, shared.task.wbits);
int taskNo = taskCount * blockIdx.y + order - 1;
shared.bits[threadIdx.x] = __mul24((33 - __clz(__float2int_rn(fabs(shared.arp[threadIdx.x]) * (1 << 15))) - precision), threadIdx.x < order);
shared.bits[threadIdx.x] = max(shared.bits[threadIdx.x], shared.bits[threadIdx.x + 16]);
shared.bits[threadIdx.x] = max(shared.bits[threadIdx.x], shared.bits[threadIdx.x + 8]);
shared.bits[threadIdx.x] = max(shared.bits[threadIdx.x], shared.bits[threadIdx.x + 4]);
shared.bits[threadIdx.x] = max(shared.bits[threadIdx.x], shared.bits[threadIdx.x + 2]);
shared.bits[threadIdx.x] = max(shared.bits[threadIdx.x], shared.bits[threadIdx.x + 1]);
int sh = max(0,min(15, 15 - shared.bits[0]));
// get 15 bits of each coeff
shared.bits[threadIdx.x] = __mul24(__float2int_rn(shared.arp[threadIdx.x] * (1 << 15)), threadIdx.x < order);
// remove sign bits
shared.bits[threadIdx.x] = shared.bits[threadIdx.x] ^ (shared.bits[threadIdx.x] >> 31);
// OR reduction
SUM32(shared.bits,threadIdx.x,|=);
// choose precision
if (threadIdx.x == 0)
shared.task.cbits = max(3, min(10, shared.actual_bits)); // - __float2int_rn(shared.PE[order - 1])
// calculate shift based on precision and number of leading zeroes in coeffs
if (threadIdx.x == 0)
shared.task.shift = max(0,min(15, __clz(shared.bits[0]) - 18 + shared.task.cbits));
// quantize coeffs with given shift
shared.task.coefs[threadIdx.x] = max(-(1 << (shared.task.cbits - 1)), min((1 << (shared.task.cbits - 1))-1, __float2int_rn(shared.arp[threadIdx.x] * (1 << shared.task.shift))));
// error correction
//shared.tmp[threadIdx.x] = (threadIdx.x != 0) * (shared.arp[threadIdx.x - 1]*(1 << shared.task.shift) - shared.task.coefs[threadIdx.x - 1]);
//shared.task.coefs[threadIdx.x] = max(-(1 << (shared.task.cbits - 1)), min((1 << (shared.task.cbits - 1))-1, __float2int_rn((shared.arp[threadIdx.x]) * (1 << shared.task.shift) + shared.tmp[threadIdx.x])));
// remove sign bits
shared.bits[threadIdx.x] = __mul24(shared.task.coefs[threadIdx.x] ^ (shared.task.coefs[threadIdx.x] >> 31), threadIdx.x < order);
// OR reduction
SUM32(shared.bits,threadIdx.x,|=);
// calculate actual number of bits (+1 for sign)
if (threadIdx.x == 0)
shared.task.cbits = 1 + 32 - __clz(shared.bits[0]);
// reverse coefs
int coef = max(-(1 << precision),min((1 << precision)-1,__float2int_rn(shared.arp[order - 1 - threadIdx.x] * (1 << sh))));
// output shift, cbits and output coeffs in reverse order
int taskNo = taskCount * blockIdx.y + order - 1;
if (threadIdx.x == 0)
tasks[taskNo].shift = shared.task.shift;
if (threadIdx.x == 0)
tasks[taskNo].cbits = shared.task.cbits;
if (threadIdx.x < order)
tasks[taskNo].coefs[threadIdx.x] = coef;
if (threadIdx.x == 0)
tasks[taskNo].shift = sh;
shared.bits[threadIdx.x] = 33 - max(__clz(coef),__clz(-1 ^ coef));
shared.bits[threadIdx.x] = max(shared.bits[threadIdx.x], shared.bits[threadIdx.x + 16]);
shared.bits[threadIdx.x] = max(shared.bits[threadIdx.x], shared.bits[threadIdx.x + 8]);
shared.bits[threadIdx.x] = max(shared.bits[threadIdx.x], shared.bits[threadIdx.x + 4]);
shared.bits[threadIdx.x] = max(shared.bits[threadIdx.x], shared.bits[threadIdx.x + 2]);
shared.bits[threadIdx.x] = max(shared.bits[threadIdx.x], shared.bits[threadIdx.x + 1]);
int cbits = shared.bits[0];
if (threadIdx.x == 0)
tasks[taskNo].cbits = cbits;
tasks[taskNo].coefs[threadIdx.x] = shared.task.coefs[order - 1 - threadIdx.x];
}
}
if (threadIdx.x < taskCount)
tasks[blockIdx.y * taskCount + threadIdx.x].wbits = shared.task.wbits;
}
extern "C" __global__ void cudaComputeLPCLattice512(
@@ -444,7 +466,7 @@ extern "C" __global__ void cudaComputeLPCLattice512(
// DEN = F*F'
shared.tmp[threadIdx.x] = FSQR(shared.F[threadIdx.x]);
__syncthreads();
SUM512(shared.tmp,threadIdx.x);
SUM512(shared.tmp,threadIdx.x,+=);
__syncthreads();
if (threadIdx.x == 0)
shared.f = shared.b = shared.tmp[0];
@@ -457,7 +479,7 @@ extern "C" __global__ void cudaComputeLPCLattice512(
// reff = F(order+1:frameSize) * B(1:frameSize-order)' / DEN
shared.tmp[threadIdx.x] = (threadIdx.x + order < frameSize) * shared.F[threadIdx.x + order] * shared.B[threadIdx.x];
__syncthreads();
SUM512(shared.tmp, threadIdx.x);
SUM512(shared.tmp, threadIdx.x,+=);
__syncthreads();
//float reff = shared.tmp[0] * rsqrtf(shared.b * shared.f); // Geometric lattice
@@ -487,7 +509,7 @@ extern "C" __global__ void cudaComputeLPCLattice512(
// b = B(1:frameSize-order) * B(1:frameSize-order)'
shared.tmp[threadIdx.x] = (threadIdx.x < frameSize - order) * FSQR(shared.F[threadIdx.x + order]);
__syncthreads();
SUM512(shared.tmp, threadIdx.x);
SUM512(shared.tmp, threadIdx.x,+=);
__syncthreads();
if (threadIdx.x == 0)
shared.f = shared.tmp[0];
@@ -495,7 +517,7 @@ extern "C" __global__ void cudaComputeLPCLattice512(
shared.tmp[threadIdx.x] = (threadIdx.x < frameSize - order) * FSQR(shared.B[threadIdx.x]);
__syncthreads();
SUM512(shared.tmp, threadIdx.x);
SUM512(shared.tmp, threadIdx.x,+=);
__syncthreads();
if (threadIdx.x == 0)
shared.b = shared.tmp[0];
@@ -528,7 +550,7 @@ extern "C" __global__ void cudaComputeLPCLattice512(
tasks[taskNo].coefs[cn] = coef;
if (cn == 0)
tasks[taskNo].shift = sh;
shared.bits[threadIdx.x] = 33 - max(__clz(coef),__clz(-1 ^ coef));
shared.bits[threadIdx.x] = __mul24(33 - max(__clz(coef),__clz(-1 ^ coef)), cn < order);
shared.bits[threadIdx.x] = max(shared.bits[threadIdx.x], shared.bits[threadIdx.x + 16]);
shared.bits[threadIdx.x] = max(shared.bits[threadIdx.x], shared.bits[threadIdx.x + 8]);
shared.bits[threadIdx.x] = max(shared.bits[threadIdx.x], shared.bits[threadIdx.x + 4]);

View File

@@ -1,6 +1,301 @@
architecture {sm_10}
abiversion {1}
modname {cubin}
code {
name = cudaComputeLPCLattice
lmem = 0
smem = 5876
reg = 10
bar = 1
const {
segname = const
segnum = 1
offset = 0
bytes = 56
mem {
0x0000002f 0x000000ff 0x0000007f 0x0000003f
0x0000001f 0x00000001 0x7e800000 0x0000000a
0x00000003 0x0000000f 0x00000400 0x00000020
0x0000009e 0x3e800000
}
}
bincode {
0xa0000009 0x04000780 0x308005fd 0x644107c8
0xa0010003 0x00000000 0x3002040d 0xc4100780
0x10010003 0x00000280 0x1000ca01 0x0423c780
0x40014e05 0x00200780 0x30100205 0xc4100780
0x60004e01 0x00204780 0x30070005 0xc4100780
0x30060001 0xc4100780 0x20008200 0x2100e800
0x20000601 0x04000780 0xd00e0001 0x80c00780
0x00000605 0xc0000780 0x04001201 0xe4200780
0xf0000001 0xe0000002 0x861ffe03 0x00000000
0x3002cffd 0x6420c7c8 0x2000d401 0x04208500
0x30020001 0xc4100500 0x2000cc01 0x04200500
0xd00e0001 0x80c00500 0x1000f801 0x0403c280
0x00000605 0xc0000780 0x20008411 0x00000013
0x04047201 0xe4200780 0x3004cffd 0x6420c7c8
0x04007201 0xe4200780 0x2000d401 0x04208500
0x30020001 0xc4100500 0x2000cc01 0x04200500
0x21000001 0x04428500 0xd00e0001 0x80c00500
0x1000f801 0x0403c280 0x00000605 0xc0000780
0x04067201 0xe4200780 0x04027201 0xe4200780
0x861ffe03 0x00000000 0x30810401 0x6440c7c0
0xa00001fd 0x0c0147f8 0x00000005 0x20003780
0x00000605 0xc0000680 0xd409c80d 0x20000680
0xd401c809 0x20000680 0x1c00c001 0x0423c680
0xd800c001 0x04204680 0x04007201 0xe4200680
0x861ffe03 0x00000000 0x30820401 0x6440c7c0
0xa00001fd 0x0c0147f8 0x00000019 0x20003780
0x00000605 0xc0000680 0xd405c80d 0x20000680
0xd401c809 0x20000680 0x1c00c001 0x0423c680
0xd800c001 0x04204680 0x04007201 0xe4200680
0x861ffe03 0x00000000 0x30830401 0x6440c7d0
0xa00001fd 0x0c0147c8 0x00000605 0xc0001680
0xd403c80d 0x20001680 0xd401c809 0x20001680
0x1c00c001 0x0423d680 0xd800c001 0x04205680
0x04007201 0xe4201680 0x861ffe03 0x00000000
0x30840401 0x6440c7e0 0xa00001fd 0x0c0147d8
0x00000605 0xc0002680 0xd402c80d 0x20002680
0xd401c809 0x20002680 0x1c00c001 0x0423e680
0xd800c001 0x04206680 0x04007201 0xe4202680
0x861ffe03 0x00000000 0x00000605 0xc0001680
0xd401c809 0x20001680 0x1800e001 0x0423d680
0xd800c001 0x04205680 0x04007201 0xe4201680
0x00000605 0xc0000780 0xd401c809 0x20000780
0x1800d001 0x0423c780 0xd800c001 0x04204780
0x04007201 0xe4200780 0x1800c801 0x0423c780
0xd800c001 0x04204780 0x04007201 0xe4200780
0x1800c401 0x0423c780 0xd800c001 0x04204780
0x04007201 0xe4200780 0x1800c201 0x0423c780
0xd800c001 0x04204780 0x04007201 0xe4200780
0xa006e003 0x00000000 0x307c0401 0x640087f0
0xa00001fd 0x0c0147e8 0x1006e003 0x00003100
0xd001c805 0x20000780 0x3500e001 0x00000003
0xd400c001 0x042007f0 0xa0000001 0x44067680
0x30170001 0xec103680 0x31000001 0x04433680
0x10001601 0x2440f100 0x301f8001 0x00000003
0x307c0001 0x8c000780 0x00002801 0xe4200780
0xf0000001 0xe0000002 0x861ffe03 0x00000000
0x00000605 0xc0000780 0xd411c809 0x20000780
0x381fc001 0xec300780 0xd800c001 0x04208780
0x04007201 0xe4200780 0xd419c809 0x20000780
0x381fc001 0xec300780 0xd800c001 0x04208780
0x04027201 0xe4200780 0x00000201 0xa00007f0
0x00000605 0xc0003680 0xd409c80d 0x20003680
0xd401c809 0x20003680 0x1c00c001 0x0423f680
0xd800c001 0x04207680 0x04007201 0xe4203680
0x861ffe03 0x00000000 0x00000c01 0xa00007f0
0x00000605 0xc0003680 0xd405c80d 0x20003680
0xd401c809 0x20003680 0x1c00c001 0x0423f680
0xd800c001 0x04207680 0x04007201 0xe4203680
0x861ffe03 0x00000000 0x00000605 0xc0000680
0xd403c80d 0x20000680 0xd401c809 0x20000680
0x1c00c001 0x0423c680 0xd800c001 0x04204680
0x04007201 0xe4200680 0x861ffe03 0x00000000
0x00000605 0xc0001680 0xd402c80d 0x20001680
0xd401c809 0x20001680 0x1c00c001 0x0423d680
0xd800c001 0x04205680 0x04007201 0xe4201680
0x861ffe03 0x00000000 0x00000605 0xc0001680
0xd401c809 0x20001680 0x1800e001 0x0423d680
0xd800c001 0x04205680 0x04007201 0xe4201680
0x00000605 0xc0000780 0xd401c809 0x20000780
0x1800d001 0x0423c780 0xd800c001 0x04204780
0x04007201 0xe4200780 0x1800c801 0x0423c780
0xd800c001 0x04204780 0x04007201 0xe4200780
0x1800c401 0x0423c780 0xd800c001 0x04204780
0x04007201 0xe4200780 0x1800c201 0x0423c780
0xd800c001 0x04204780 0x04007201 0xe4200780
0xa00b6003 0x00000000 0x100b6003 0x00002100
0xd001c805 0x20000780 0x347cc1fd 0x6c2087f8
0xd001c805 0x20000780 0xa400c001 0x44267500
0x30170001 0xec103500 0x31000001 0x04433500
0x10001601 0x2440f280 0x2000e801 0x04200780
0x30208001 0x00000003 0x000b7801 0xe4200780
0xf0000001 0xe0000002 0x861ffe03 0x00000000
0x00000605 0xc0000780 0x1000e801 0x0423c780
0xd411c809 0x20000780 0x3800c001 0xec200780
0xa0000001 0x44014780 0x04007201 0xe4200780
0xd419c809 0x20000780 0x1000e801 0x0423c780
0x3800c001 0xec200780 0xa0000001 0x44014780
0x04027201 0xe4200780 0xd401c809 0x20000780
0x1800c001 0x0423c780 0x04047201 0xe4200780
0xd409c809 0x20000780 0x1800c001 0x0423c780
0x04067201 0xe4200780 0x861ffe03 0x00000000
0x3085d1fd 0x6c6047f8 0x10217003 0x00003280
0x10018015 0x00000003 0x20000a01 0x04008780
0x00020005 0xc0000780 0xd409c809 0x20000780
0x20000a1d 0x04010780 0x3000cffd 0x6420c7f8
0x00000021 0x20003780 0x1800c005 0x0423c780
0x3007cffd 0x6420c7f8 0x1000f805 0x0403f280
0xd401c809 0x20000780 0x1800c001 0x0423c780
0x00001001 0xa00007f0 0x1000f801 0x0403f280
0xc001021d 0x00000780 0x00000609 0xc0000780
0xe000001d 0x0001c780 0x08087201 0xe421c780
0xd819c80d 0x20000780 0x1d00e020 0x2145ee1c
0xcc08c025 0x00200780 0x30040ffd 0x6400c7f8
0x1000f825 0x0403f280 0xd811c80d 0x20000780
0x1c00c021 0x0423c780 0xec08c021 0x00224780
0x30020ffd 0x6400c7f8 0x10001221 0x0403f280
0xd821c80d 0x20000780 0xbc00c021 0x00220780
0x08087201 0xe4220780 0x861ffe03 0x00000000
0x00000c01 0xa00007f0 0x00000609 0xc0003680
0xd825c811 0x20003680 0xd821c80d 0x20003680
0x1000c021 0x0423f684 0xbc00c021 0x00223680
0x08087201 0xe4223680 0x861ffe03 0x00000000
0x00000609 0xc0000680 0xd823c811 0x20000680
0xd821c80d 0x20000680 0x1000c021 0x0423c684
0xbc00c021 0x00220680 0x08087201 0xe4220680
0x861ffe03 0x00000000 0x00000609 0xc0001680
0xd822c811 0x20001680 0xd821c80d 0x20001680
0x1000c021 0x0423d684 0xbc00c021 0x00221680
0x08087201 0xe4221680 0x861ffe03 0x00000000
0x00000609 0xc0001680 0xd821c80d 0x20001680
0x1c00e021 0x0423d680 0xbc00c021 0x00221680
0x08087201 0xe4221680 0x00000609 0xc0000780
0xd821c80d 0x20000780 0x1d00f020 0xbd086020
0x08087201 0xe4220780 0x1d00e820 0xbd086020
0x08087201 0xe4220780 0x1d00e420 0xbd086020
0x08087201 0xe4220780 0x1d00e220 0xbd086020
0x08087201 0xe4220780 0xa011f003 0x00000000
0x1011f003 0x00002100 0xd021c809 0x20000780
0xc9006021 0x03f00003 0x000b7401 0xe4220780
0x20018e21 0x00000003 0xa0001025 0x44014780
0xc9006021 0x03f00003 0xb08613fd 0x605107f8
0xc08d1021 0x00403680 0xc08d1225 0x00403680
0x90001224 0xc0091020 0x00020a09 0xc0000780
0x080b3001 0xe4220780 0xf0000001 0xe0000002
0x861ffe03 0x00000000 0x00000609 0xc0000780
0xd819c80d 0x20000780 0xd811c811 0x20000780
0xcc01c021 0x00200780 0xe000c021 0x00220784
0x08087201 0xe4220780 0x861ffe03 0x00000000
0x00000c01 0xa00007f0 0x00000609 0xc0003680
0xd825c811 0x20003680 0xd821c80d 0x20003680
0x1000c021 0x0423f684 0xbc00c021 0x00223680
0x08087201 0xe4223680 0x861ffe03 0x00000000
0x00000609 0xc0000680 0xd823c811 0x20000680
0xd821c80d 0x20000680 0x1000c021 0x0423c684
0xbc00c021 0x00220680 0x08087201 0xe4220680
0x861ffe03 0x00000000 0x00000609 0xc0001680
0xd822c811 0x20001680 0xd821c80d 0x20001680
0x1000c021 0x0423d684 0xbc00c021 0x00221680
0x08087201 0xe4221680 0x861ffe03 0x00000000
0x00000609 0xc0001680 0xd821c80d 0x20001680
0x1c00e021 0x0423d680 0xbc00c021 0x00221680
0x08087201 0xe4221680 0x00000609 0xc0000780
0xd821c80d 0x20000780 0x1d00f020 0xbd086020
0x08087201 0xe4220780 0x1d00e820 0xbd086020
0x08087201 0xe4220780 0x1d00e420 0xbd086020
0x08087201 0xe4220780 0x1d00e220 0xbd086020
0x08087201 0xe4220780 0xa0157003 0x00000000
0x10157003 0x00002100 0xd021c809 0x20000780
0xd02dd00d 0x20000780 0x1900e020 0x1d00e024
0xbc86c1fd 0x607107f8 0xc08d1021 0x00403680
0xc08d1225 0x00403680 0x90001224 0xc0091020
0x000b7601 0xe4220780 0xf0000001 0xe0000002
0x861ffe03 0x00000000 0xa0160003 0x00000000
0x10160003 0x00002100 0xd02dd80d 0x20000780
0x00020a09 0xc0000780 0x1d00e020 0x1d00e024
0x080ab001 0xe4220780 0x080a7001 0xe4224780
0xf0000001 0xe0000002 0x203f8a21 0x0fffffff
0x300805fd 0x640187f8 0xa016f003 0x00000000
0x1016f003 0x00003280 0x20400a21 0x04008780
0x00021011 0xc0000780 0xd02dd80d 0x20000780
0x00000609 0xc0000780 0x1c00c025 0x0423c780
0xd829c80d 0x20000780 0xd029b811 0x20000784
0x1c00c021 0x0423c780 0xe009c021 0x04220784
0x080a7201 0xe4220780 0xf0000001 0xe0000002
0xa017e003 0x00000000 0x30020ffd 0x6400c7f8
0x1017e003 0x00003280 0xd401c811 0x20000780
0x0000060d 0xc0000780 0xd02dd809 0x20000780
0x1000c025 0x0423c784 0xdc11c811 0x20000780
0x1800c021 0x0423c780 0xe008c021 0x04224784
0x04007201 0xe4220780 0x1000c021 0x0423c784
0xe800c001 0x04220780 0x0c047201 0xe4200780
0xf0000001 0xe0000002 0xa018d003 0x00000000
0x30040ffd 0x6400c7f8 0x1018d003 0x00003280
0xd409c811 0x20000780 0x0000060d 0xc0000780
0xd02dd809 0x20000780 0x1000c01d 0x0423c784
0xdc19c811 0x20000780 0x1800c001 0x0423c780
0xe000c001 0x0421c784 0x04027201 0xe4200780
0x1000c001 0x0423c784 0xe801c001 0x04200780
0x0c067201 0xe4200780 0xf0000001 0xe0000002
0x861ffe03 0x00000000 0xa0212003 0x00000000
0x10212003 0x00001100 0x00000605 0xc0000780
0xd429c809 0x20000780 0xc9006005 0x04700003
0x30020a01 0x640107f0 0x0000001d 0x20003780
0xd0850021 0x04400780 0xa0000201 0x8c004780
0x40001001 0x00018780 0x301f0005 0xec100780
0x040af201 0xe4200780 0xd0010001 0x04008780
0xd42bd009 0x20000780 0x040af201 0xe4200780
0xd800de01 0x04204780 0x040af201 0xe4200780
0xd800ce01 0x04204780 0x040af201 0xe4200780
0xd800c601 0x04204780 0x040af201 0xe4200780
0xd800c201 0x04204780 0x040af201 0xe4200780
0xd800c001 0x04204780 0x040af201 0xe4200780
0xd02de005 0x20000780 0x3487c001 0xac602680
0x30880001 0x8c402680 0x00001801 0xe4202680
0x1000d801 0x0423e100 0xa01bf003 0x00000000
0x101be003 0x00002100 0xd02bc805 0x20000780
0x347cc1fd 0x6c2087f8 0x101b7003 0x00003280
0xd02bc805 0x20000780 0xa400c005 0x44264780
0x30170205 0xec100780 0x301e8205 0x0000000b
0x101b8003 0x00000780 0x10208005 0x00000003
0x20000205 0x04000780 0x202e8205 0x0fffffff
0x30890205 0xac400780 0x307c0205 0x8c000780
0x00001601 0xe4204780 0x101bf003 0x00000780
0x1000d605 0x0423c780 0xf0000001 0xe0000002
0x10018025 0x00000003 0x30011205 0xc4000780
0x00000605 0xc0000780 0x203f8001 0x0fffffff
0xd429c809 0x20000780 0xa0000205 0x44014780
0x30001201 0xc4000780 0xc801c025 0x00200780
0x203f8005 0x0fffffff 0xa0001225 0x8c004780
0x30090205 0xac000780 0x30008001 0x00000003
0x30010001 0x8c000780 0x301f0005 0xec100780
0x04003201 0xe4200780 0xd0010001 0x04008780
0x40080001 0x00018780 0xd42bd009 0x20000780
0x040af201 0xe4200780 0xd800de01 0x04204780
0x040af201 0xe4200780 0xd800ce01 0x04204780
0x040af201 0xe4200780 0xd800c601 0x04204780
0x040af201 0xe4200780 0xd800c201 0x04204780
0x040af201 0xe4200780 0xd800c001 0x04204780
0x040af201 0xe4200780 0xa0202003 0x00000000
0x10202003 0x00002100 0xd02bc805 0x20000780
0x347cc1fd 0x6c2087f8 0xd02bc805 0x20000780
0xa400c001 0x44267500 0x30170001 0xec103500
0x31000001 0x04433500 0x10001601 0x2440f280
0x30218001 0x00000003 0x00001801 0xe4200780
0x10202003 0x00002100 0x1000ca01 0x0423c780
0x40014e05 0x00200780 0x30100205 0xc4100780
0x60004e01 0x00204780 0x20000001 0x04014780
0x203f8001 0x0fffffff 0x30070005 0xc4100780
0x30060001 0xc4100780 0x20008200 0x2100e804
0x1000d601 0x0423c780 0x20088205 0x00000003
0xd00e0201 0xa0c00780 0x10202003 0x00002100
0x1000ca01 0x0423c780 0x40014e05 0x00200780
0x30100205 0xc4100780 0x60004e01 0x00204780
0x20000001 0x04014780 0x203f8001 0x0fffffff
0x30070005 0xc4100780 0x30060001 0xc4100780
0x20008200 0x2100e804 0x1000d801 0x0423c780
0x200c8205 0x00000003 0xd00e0201 0xa0c00780
0x00000e01 0xa00007f2 0x10212003 0x00003100
0x20428a04 0x1100ea00 0x00020205 0xc0000780
0x40014e05 0x00200780 0x30100205 0xc4100780
0x60004e01 0x00204780 0x20000001 0x04014780
0x203f8001 0x0fffffff 0x30070005 0xc4100780
0x30060001 0xc4100780 0x20008200 0x2100e800
0x20000605 0x04000780 0x1400f001 0x0423c780
0x20008205 0x00000007 0xd00e0201 0xa0c00780
0xf0000001 0xe0000002 0x20018a15 0x00000003
0x2101f001 0x00000003 0x300501fd 0x6c0147f8
0x100cd003 0x00003280 0x3002cbfd 0x6420c7c8
0x30000003 0x00000280 0x1000ca01 0x0423c780
0x40014e05 0x00200780 0x30100205 0xc4100780
0x60004e01 0x00204780 0x20000001 0x04008780
0x30070005 0xc4100780 0x30060001 0xc4100780
0x20008200 0x2100e804 0x1000e801 0x0423c780
0x202c8205 0x00000003 0xd00e0201 0xa0c00781
}
}
code {
name = cudaComputeAutocor
lmem = 0
@@ -582,21 +877,328 @@ code {
}
}
code {
name = cudaComputeLPC
name = cudaComputeLPCLattice512
lmem = 0
smem = 1260
smem = 12780
reg = 10
bar = 1
const {
segname = const
segnum = 1
offset = 0
bytes = 52
bytes = 56
mem {
0x0000002f 0x000000ff 0x0000007f 0x0000003f
0x0000001f 0x00000001 0x00000020 0x7e800000
0x00000008 0x00000002 0x00000009 0x0000000f
0x3e800000 0x0000009e
}
}
bincode {
0xa000000d 0x04000780 0x308007fd 0x644107c8
0xa0010003 0x00000000 0x30020609 0xc4100780
0x10010003 0x00000280 0x1000ca01 0x0423c780
0x40014e05 0x00200780 0x30100205 0xc4100780
0x60004e01 0x00204780 0x30070005 0xc4100780
0x30060001 0xc4100780 0x20008200 0x2100e800
0x20000401 0x04000780 0xd00e0001 0x80c00780
0x00000405 0xc0000780 0x04001201 0xe4200780
0xf0000001 0xe0000002 0x861ffe03 0x00000000
0x3003cffd 0x6420c7c8 0xa001e003 0x00000000
0x1001d003 0x00000280 0x2000d401 0x0420c780
0x30020001 0xc4100780 0x2000cc01 0x04200780
0xd00e0005 0x80c00780 0x1000e801 0x0423c780
0x30000201 0xec000780 0xa0000001 0x44014780
0x1001e003 0x00000780 0x1000f801 0x0403c780
0x30020609 0xc4100782 0x00000405 0xc0000780
0x04007201 0xe4200780 0x04047201 0xe4200780
0x861ffe03 0x00000000 0x00000405 0xc0000780
0xd401c809 0x20000780 0x1900e000 0xc9006000
0x04107201 0xe4200780 0x861ffe03 0x00000000
0x30810601 0x6440c7c0 0xa00001fd 0x0c0147f8
0x0000001d 0x20003780 0x00000405 0xc0000680
0xd449c80d 0x20000680 0xd441c809 0x20000680
0x1c00c001 0x0423c680 0xb800c001 0x00200680
0x04107201 0xe4200680 0x861ffe03 0x00000000
0x30820601 0x6440c7d0 0xa00001fd 0x0c0147c8
0xa003c003 0x00000000 0x1003c003 0x00001100
0x30020609 0xc4100780 0x00000405 0xc0000780
0xd445c80d 0x20000780 0xd441c809 0x20000780
0x1d00e000 0xb9006000 0x04107201 0xe4200780
0xf0000001 0xe0000002 0x861ffe03 0x00000000
0x30830601 0x6440c7e0 0xa0048003 0x00000000
0xa00001fd 0x0c0147d8 0x10048003 0x00002100
0x30020609 0xc4100780 0x00000405 0xc0000780
0xd443c80d 0x20000780 0xd441c809 0x20000780
0x1d00e000 0xb9006000 0x04107201 0xe4200780
0xf0000001 0xe0000002 0x861ffe03 0x00000000
0xa0056003 0x00000000 0x30840601 0x6440c7f0
0x00000005 0x20003780 0xa00001fd 0x0c0147e8
0x00000201 0xa00007f0 0x10056003 0x00003100
0x30020609 0xc4100780 0x00000405 0xc0000780
0xd442c80d 0x20000780 0xd441c809 0x20000780
0x1d00e000 0xb9006000 0x04107201 0xe4200780
0xf0000001 0xe0000002 0x861ffe03 0x00000000
0x30020609 0xc4102680 0x00000405 0xc0002680
0xd441c809 0x20002680 0x1800e001 0x0423e680
0xb800c001 0x00202680 0x04107201 0xe4202680
0x30020609 0xc4100780 0x00000405 0xc0000780
0xd441c809 0x20000780 0x1900f000 0xb9006000
0x04107201 0xe4200780 0x1900e800 0xb9006000
0x04107201 0xe4200780 0x1900e400 0xb9006000
0x04107201 0xe4200780 0x1900e200 0xb9006000
0x04107201 0xe4200780 0x861ffe03 0x00000000
0x307c0601 0x640087f0 0x00000005 0x20003780
0xa00001fd 0x0c0147f8 0x00000015 0x20003780
0x00000201 0xa00007f0 0xd041c805 0x20000780
0x1400c001 0x0423f680 0x1400c005 0x0423f680
0x0018f401 0xe4203680 0x0018f201 0xe4207680
0x861ffe03 0x00000000 0x3085d1fd 0x6c6047f8
0x101a7003 0x00003280 0x10018011 0x00000003
0x308607fd 0x644087f8 0x00000019 0x20003780
0x30020609 0xc4100780 0x00000409 0xc0000780
0x20000805 0x0400c780 0xd811c80d 0x20000780
0x00020205 0xc0000780 0x1c00c001 0x0423c780
0xd401c80d 0x20000780 0xcc00c001 0x00200780
0x3001cffd 0x6420c7f8 0x00000005 0x20003780
0x1000f801 0x0403f280 0x08107201 0xe4200780
0x861ffe03 0x00000000 0xa0091003 0x00000000
0x00000e01 0xa00007f0 0x10091003 0x00003100
0x30020609 0xc4100780 0x00000409 0xc0000780
0xd849c811 0x20000780 0xd841c80d 0x20000780
0x1000c001 0x0423c784 0xbc00c001 0x00200780
0x08107201 0xe4200780 0xf0000001 0xe0000002
0x861ffe03 0x00000000 0xa009c003 0x00000000
0x1009c003 0x00000100 0x30020609 0xc4100780
0x00000409 0xc0000780 0xd845c811 0x20000780
0xd841c80d 0x20000780 0x1000c001 0x0423c784
0xbc00c001 0x00200780 0x08107201 0xe4200780
0xf0000001 0xe0000002 0x861ffe03 0x00000000
0xa00a7003 0x00000000 0x100a7003 0x00001100
0x30020609 0xc4100780 0x00000409 0xc0000780
0xd843c811 0x20000780 0xd841c80d 0x20000780
0x1000c001 0x0423c784 0xbc00c001 0x00200780
0x08107201 0xe4200780 0xf0000001 0xe0000002
0x861ffe03 0x00000000 0xa00b2003 0x00000000
0x100b2003 0x00002100 0x30020609 0xc4100780
0x00000409 0xc0000780 0xd842c811 0x20000780
0xd841c80d 0x20000780 0x1000c001 0x0423c784
0xbc00c001 0x00200780 0x08107201 0xe4200780
0xf0000001 0xe0000002 0x861ffe03 0x00000000
0x30020609 0xc4102680 0x00000409 0xc0002680
0xd841c80d 0x20002680 0x1c00e001 0x0423e680
0xbc00c001 0x00202680 0x08107201 0xe4202680
0x30020609 0xc4100780 0x00000409 0xc0000780
0xd841c80d 0x20000780 0x1d00f000 0xbd006000
0x08107201 0xe4200780 0x1d00e800 0xbd006000
0x08107201 0xe4200780 0x1d00e400 0xbd006000
0x08107201 0xe4200780 0x1d00e200 0xbd006000
0x08107201 0xe4200780 0x861ffe03 0x00000000
0xd063c80d 0x20000780 0xd041c809 0x20000780
0x1d00e200 0x1900e004 0xbc00c001 0x00200780
0x861ffe03 0x00000000 0xa00d7003 0x00000000
0x00000c01 0xa00007f0 0x100d7003 0x00003100
0x00020809 0xc0000780 0xb0010220 0x10008008
0xb08701fd 0x605107f8 0x00000025 0x20003780
0xc08c1021 0x00403680 0xc08c0409 0x00403680
0x90000408 0xc0021008 0x0814b001 0xe4208780
0x08147001 0xe4208780 0xf0000001 0xe0000002
0xa00ed003 0x00000000 0x100ed003 0x00002100
0xb0010220 0x10008008 0xb08701fd 0x605107f8
0x00000025 0x20003780 0xc08c1021 0x00403680
0xc08c0409 0x00403680 0x90000408 0xc0021020
0x30000609 0x04010780 0x00020409 0xc0000780
0xd851b809 0x20000780 0x203f8809 0x0fffffff
0xc808c021 0x0020c780 0x300207fd 0x640187f8
0x00000009 0x20003780 0x1000f821 0x0403f280
0x30020609 0xc4100780 0x00000409 0xc0000780
0xd851c80d 0x20000780 0xbc00c009 0x08220780
0x08147201 0xe4208780 0x2040ce21 0x04210782
0xa0101003 0x00000000 0x300311fd 0x6400c7f8
0x00000009 0x20003780 0x10101003 0x00003280
0xd401c809 0x20000780 0xb0010208 0x1900e004
0xb08701fd 0x605107f8 0x00000025 0x20003780
0xc08c0409 0x00403680 0xc08c0001 0x00403680
0x90000000 0xc0000424 0x30020609 0xc4100780
0x00000409 0xc0000780 0xd811c80d 0x20000780
0xec09c001 0x04204780 0x04007201 0xe4200780
0x1c00c001 0x0423c780 0xe0011201 0x04000780
0x08047201 0xe4200780 0xf0000001 0xe0000002
0x861ffe03 0x00000000 0xd401c805 0x20000780
0x1500e000 0xc5006000 0x30031005 0x6400c7f0
0x00000009 0x20003780 0x1000f801 0x0403f280
0x30020609 0xc4100780 0x00000405 0xc0000780
0x04107201 0xe4200780 0xa00003fd 0x0c0147f8
0x00000005 0x20003780 0x861ffe03 0x00000000
0xa0117003 0x00000000 0x00000e01 0xa00007f0
0x10117003 0x00003100 0x30020609 0xc4100780
0x00000405 0xc0000780 0xd449c80d 0x20000780
0xd441c809 0x20000780 0x1d00e000 0xb9006000
0x04107201 0xe4200780 0xf0000001 0xe0000002
0x861ffe03 0x00000000 0xa0121003 0x00000000
0x10121003 0x00000100 0x30020609 0xc4100780
0x00000405 0xc0000780 0xd445c80d 0x20000780
0xd441c809 0x20000780 0x1d00e000 0xb9006000
0x04107201 0xe4200780 0xf0000001 0xe0000002
0x861ffe03 0x00000000 0xa012b003 0x00000000
0x1012b003 0x00001100 0x30020609 0xc4100780
0x00000405 0xc0000780 0xd443c80d 0x20000780
0xd441c809 0x20000780 0x1d00e000 0xb9006000
0x04107201 0xe4200780 0xf0000001 0xe0000002
0x861ffe03 0x00000000 0xa0135003 0x00000000
0x10135003 0x00002100 0x30020609 0xc4100780
0x00000405 0xc0000780 0xd442c80d 0x20000780
0xd441c809 0x20000780 0x1d00e000 0xb9006000
0x04107201 0xe4200780 0xf0000001 0xe0000002
0x861ffe03 0x00000000 0x30020609 0xc4102680
0x00000405 0xc0002680 0xd441c809 0x20002680
0x1800e001 0x0423e680 0xb800c001 0x00202680
0x04107201 0xe4202680 0x30020609 0xc4100780
0x00000405 0xc0000780 0xd441c809 0x20000780
0x1900f000 0xb9006000 0x04107201 0xe4200780
0x1900e800 0xb9006000 0x04107201 0xe4200780
0x1900e400 0xb9006000 0x04107201 0xe4200780
0x1900e200 0xb9006000 0x04107201 0xe4200780
0x861ffe03 0x00000000 0x00000a01 0xa00007f0
0xd041c805 0x20000780 0x1400c001 0x0423f680
0x0018f201 0xe4203680 0x861ffe03 0x00000000
0x30020609 0xc4100780 0x00000405 0xc0000780
0xd411c809 0x20000780 0x1900e000 0xc9006000
0x00000201 0xa00007f0 0x1000f801 0x0403f280
0x04107201 0xe4200780 0x861ffe03 0x00000000
0xa015f003 0x00000000 0x00000e01 0xa00007f0
0x1015f003 0x00003100 0x30020609 0xc4100780
0x00000405 0xc0000780 0xd449c80d 0x20000780
0xd441c809 0x20000780 0x1d00e000 0xb9006000
0x04107201 0xe4200780 0xf0000001 0xe0000002
0x861ffe03 0x00000000 0xa0169003 0x00000000
0x10169003 0x00000100 0x30020609 0xc4100780
0x00000405 0xc0000780 0xd445c80d 0x20000780
0xd441c809 0x20000780 0x1d00e000 0xb9006000
0x04107201 0xe4200780 0xf0000001 0xe0000002
0x861ffe03 0x00000000 0xa0173003 0x00000000
0x10173003 0x00001100 0x30020609 0xc4100780
0x00000405 0xc0000780 0xd443c80d 0x20000780
0xd441c809 0x20000780 0x1d00e000 0xb9006000
0x04107201 0xe4200780 0xf0000001 0xe0000002
0x861ffe03 0x00000000 0xa017d003 0x00000000
0x1017d003 0x00002100 0x30020609 0xc4100780
0x00000405 0xc0000780 0xd442c80d 0x20000780
0xd441c809 0x20000780 0x1d00e000 0xb9006000
0x04107201 0xe4200780 0xf0000001 0xe0000002
0x861ffe03 0x00000000 0x30020609 0xc4102680
0x00000405 0xc0002680 0xd441c809 0x20002680
0x1800e001 0x0423e680 0xb800c001 0x00202680
0x04107201 0xe4202680 0x30020609 0xc4100780
0x00000405 0xc0000780 0xd441c809 0x20000780
0x1900f000 0xb9006000 0x04107201 0xe4200780
0x1900e800 0xb9006000 0x04107201 0xe4200780
0x1900e400 0xb9006000 0x04107201 0xe4200780
0x1900e200 0xb9006000 0x04107201 0xe4200780
0x861ffe03 0x00000000 0x00000a01 0xa00007f0
0xd041c805 0x20000780 0x1400c001 0x0423f680
0x0018f401 0xe4203680 0x861ffe03 0x00000000
0xa01a0003 0x00000000 0x101a0003 0x00002100
0x30070801 0xc4100780 0x30020609 0xc4100780
0x00000405 0xc0000780 0x20000401 0x04000780
0xd451c809 0x20000780 0x00000005 0xc0000780
0x1800c001 0x0423c780 0x04083201 0xe4200780
0xf0000001 0xe0000002 0x861ffe03 0x00000000
0x20018811 0x00000003 0x2101f001 0x00000003
0x300401fd 0x6c0147f8 0x00000001 0x20003780
0x1007a003 0x00003280 0x30050601 0xe4100780
0x20018005 0x00000003 0x3001d1fd 0x6c2047c8
0x30000003 0x00000280 0xd0840611 0x04400780
0x20400601 0x04010780 0x00020005 0xc0000780
0x30070215 0xc4100780 0x307c09fd 0x6c0087c8
0x30020819 0xc4100780 0x30020609 0xc4100780
0x3089e801 0xac600780 0x308803fd 0x6c4107d8
0x300a800d 0x00000003 0x3100000d 0x04429280
0x20000a01 0x04018780 0x00000009 0xc0000780
0xd820c809 0x20000780 0xa800c001 0xc4304780
0xc0000001 0x04700003 0xa0000001 0x8c0047d0
0xa0000001 0x44065680 0x30170001 0xec101680
0x31000001 0x04435680 0x10000c01 0x2440d100
0x2000001d 0x0400c780 0x30010801 0x6c0047d0
0x30218e21 0x00000003 0xd085001d 0x04400780
0x00000409 0xc0000780 0x40080e01 0x00018780
0x0814f201 0xe4200780 0xd853c80d 0x20000780
0x1c00e001 0x0423c780 0x3c00c001 0x8c200780
0x0814f201 0xe4200780 0x1c00d001 0x0423c780
0x3c00c001 0x8c200780 0x0814f201 0xe4200780
0x1c00c801 0x0423c780 0x3c00c001 0x8c200780
0x0814f201 0xe4200780 0x1c00c401 0x0423c780
0x3c00c001 0x8c200780 0x0814f201 0xe4200780
0x1c00c201 0x0423c780 0x3c00c001 0x8c200780
0x0814f201 0xe4200780 0x30000801 0x04004780
0x30020001 0xc4100780 0xd453c809 0x20000780
0x20000a21 0x04000780 0x390fe001 0x00000003
0x00001009 0xc0000780 0x308b0001 0xac400780
0x10018025 0x00000003 0x307c0021 0x8c000780
0x30081201 0xc4000780 0xd820c009 0x20000780
0xa0000001 0x44014780 0x3003120d 0xc4000780
0xc800c025 0x00200780 0x203f8601 0x0fffffff
0xa0001225 0x8c004780 0x30090025 0xac000780
0x30008601 0x00000003 0x3009000d 0x8c000780
0xa01f8003 0x00000000 0x101f8003 0x00001100
0x1000ca01 0x0423c780 0x40014e25 0x00200780
0x30101225 0xc4100780 0x60004e01 0x00224780
0x20000001 0x04004780 0x203f8025 0x0fffffff
0x30071201 0xc4100780 0x30061225 0xc4100780
0x20098000 0x2100e800 0x20000c01 0x04000780
0x20008001 0x00000007 0xd00e000d 0xa0c00780
0xf0000001 0xe0000002 0xa0206003 0x00000000
0x10206003 0x00000100 0x1000ca01 0x0423c780
0x40014e25 0x00200780 0x30101225 0xc4100780
0x60004e01 0x00224780 0x20000001 0x04004780
0x203f8025 0x0fffffff 0x30071201 0xc4100780
0x30061225 0xc4100780 0x20098000 0x2100e800
0x20088001 0x00000003 0xd00e0021 0xa0c00780
0x307c07fd 0x6c0087da 0xa0000601 0x44065500
0x30170001 0xec101500 0x31000001 0x04435500
0x10000c01 0x2440d280 0xd003000d 0x0402c780
0x307c07fd 0x6c0087d8 0xa000060d 0x44065500
0x3017060d 0xec101500 0x3100060d 0x04435500
0x10000c0d 0x2440d280 0x30030001 0x8c000780
0x30218001 0x00000003 0x40070001 0x00018780
0x00000409 0xc0000780 0x0814f201 0xe4200780
0xd853c80d 0x20000780 0x1c00e001 0x0423c780
0x3c00c001 0x8c200780 0x0814f201 0xe4200780
0x1c00d001 0x0423c780 0x3c00c001 0x8c200780
0x0814f201 0xe4200780 0x1c00c801 0x0423c780
0x3c00c001 0x8c200780 0x0814f201 0xe4200780
0x1c00c401 0x0423c780 0x3c00c001 0x8c200780
0x0814f201 0xe4200780 0x1c00c201 0x0423c780
0x3c00c001 0x8c200780 0x0814f201 0xe4200780
0xa0235003 0x00000000 0x10235003 0x00000100
0x1000ca01 0x0423c780 0x40014e0d 0x00200780
0x3010060d 0xc4100780 0x60004e01 0x0020c780
0x20000001 0x04004780 0x203f8001 0x0fffffff
0x3007000d 0xc4100780 0x30060001 0xc4100780
0x20000601 0x04000780 0xd453c809 0x20000780
0x2100e80c 0x1900e000 0x200c860d 0x00000003
0xd00e0601 0xa0c00780 0xf0000001 0xe0000002
0x20108205 0x00000003 0x3001d1fd 0x6c2187d8
0x20008a15 0x00000083 0x101b2003 0x00001280
0xf0000001 0xe0000001
}
}
code {
name = cudaComputeLPC
lmem = 0
smem = 1256
reg = 10
bar = 1
const {
segname = const
segnum = 1
offset = 0
bytes = 56
mem {
0x0000000f 0x0000001f 0x0000003f 0x00000040
0x00000001 0x00000020 0x7e800000 0x00000008
0x0000000c 0xfffff000 0x00000fff 0x3e800000
0x0000009e
0x00000900 0x00000480 0x00000240 0x0000000c
0x3e800000 0x0000009e
}
}
bincode {
@@ -648,32 +1250,37 @@ code {
0x04003201 0xe43f0780 0x307ccffd 0x6c20c7c8
0x3002040d 0xc4100780 0x1800c001 0x0423c780
0x30000003 0x00000280 0x307c05fd 0x6c0087c8
0x213fee11 0x0fffffff 0x1000f815 0x0403c780
0xd004d005 0x20000780 0xb08601fd 0x605107d8
0x10000005 0x0403c780 0xa400c019 0xe4204780
0xc08b0c19 0x00401680 0xc08b0205 0x00401680
0x90000204 0xc0010c04 0xd004d005 0x20000780
0xc401c019 0x0020c780 0xb0060000 0x20458818
0x300605fd 0x6c0187d8 0xa0077003 0x00000000
0x10077003 0x00001280 0x00000605 0xc0000780
0xd403d00d 0x20000780 0xd404d809 0x20000780
0xcc01c019 0x0020c780 0xc801c01d 0x0020c780
0xb9066018 0xbd07601c 0x04013401 0xe4218780
0x0400f401 0xe421c780 0x20400a19 0x04008782
0x00020c05 0xc0000780 0x30020bfd 0x6c00c7d8
0xc401f019 0x0020c780 0x1000f819 0x0403d280
0x30020bfd 0x6c0147d8 0xb0000c05 0x00004780
0x10000c05 0x0403d280 0x00000609 0xc0000780
0xb800f205 0x00204780 0x08003201 0xe4204780
0xa800f205 0xc4304780 0xc0000205 0x04700003
0xa0000205 0x8c0047d0 0x2000d619 0x04214780
0xa0000205 0x44065680 0x30170205 0xec101680
0x31000205 0x04431680 0x10000a05 0x2440d100
0x30870bfd 0x6c4107d8 0x100d801d 0x00000003
0x1000101d 0x2440d280 0x20000e05 0x04004780
0x30020a1d 0x6c0187e0 0xd0840e1d 0x04400780
0x30218205 0x00000003 0x40070205 0x00018780
0x00000609 0xc0000780 0x08007201 0xe4204780
0x1000f811 0x0403c780 0xd004d005 0x20000780
0xb08601fd 0x605107d8 0x10000005 0x0403c780
0xa400c015 0xe4204780 0xc08c0a15 0x00401680
0xc08c0205 0x00401680 0x90000204 0xc0010a04
0xd004d005 0x20000780 0xc401c015 0x0020c780
0xb0000001 0x00014780 0x213fee15 0x0fffffff
0x20400a15 0x04010780 0x300505fd 0x6c0187d8
0xa0078003 0x00000000 0x10078003 0x00001280
0x00000605 0xc0000780 0xd403d00d 0x20000780
0xd404d809 0x20000780 0xcc01c015 0x0020c780
0xc801c019 0x0020c780 0xb9056014 0xbd066018
0x04013401 0xe4214780 0x0400f401 0xe4218780
0x20400815 0x04008782 0x00020a05 0xc0000780
0x300209fd 0x6c00c7d8 0xc401f015 0x0020c780
0x1000f815 0x0403d280 0x300209fd 0x6c0147d8
0xb0000a05 0x00004780 0x10000a05 0x0403d280
0x00000609 0xc0000780 0xb800f205 0x00204780
0x08003201 0xe4204780 0x308709fd 0x6c4107d8
0x100d8005 0x00000003 0x3088d815 0x6c60c780
0x10001605 0x2440d280 0xd0840a15 0x04400780
0x20400215 0x04014780 0x3089d805 0x6c60c780
0xd0840205 0x04400780 0x20400a15 0x04004780
0x308ad805 0x6c60c780 0xd0840205 0x04400780
0x20400a1d 0x04004780 0xa800f205 0xc4304780
0xc0000205 0x04700003 0xa0000205 0x8c0047d0
0x2000d615 0x04210780 0xa0000205 0x44065680
0x30170205 0xec101680 0x31000205 0x04435680
0x10000a05 0x2440d100 0x20000219 0x0401c780
0x30020805 0x6c0187d0 0x30218c21 0x00000003
0xd0840219 0x04400780 0x00000609 0xc0000780
0x40080c05 0x00018780 0x08007201 0xe4204780
0xd801c80d 0x20000780 0x1c00e005 0x0423c780
0x3c01c005 0x8c200780 0x08007201 0xe4204780
0x1c00d005 0x0423c780 0x3c01c005 0x8c200780
@@ -683,27 +1290,27 @@ code {
0x08007201 0xe4204780 0x1c00c205 0x0423c780
0x3c01c005 0x8c200780 0x08007201 0xe4204780
0xd001c809 0x20000780 0x390fe005 0x00000003
0x30800205 0xac400780 0x1001801d 0x00000003
0x307c0205 0x8c000780 0x30010e1d 0xc4000780
0xa0000e21 0x44014780 0x103f801d 0x000001ff
0xc408f221 0x00200780 0x1000141d 0x2440d280
0xa0001021 0xac004780 0x30080e21 0xac000780
0x1000801d 0x0ffffe03 0x1000121d 0x2440d280
0x30080e1d 0x8c000780 0xa00bb003 0x00000000
0x100bb003 0x00002100 0x30070c21 0xc4100780
0x30060c25 0xc4100780 0x20099020 0x2108e820
0x20000621 0x04020780 0x20009021 0x00000007
0xd00e101d 0xa0c00780 0xf0000001 0xe0000002
0x30070c21 0xc4100680 0x30060c25 0xc4100680
0x20001021 0x04024680 0x2000c821 0x04220680
0x21001021 0x0441c680 0xd00e1005 0xa0c00680
0x307c0ffd 0x6c0087d8 0xa0000e05 0x44065500
0x30170205 0xec101500 0x31000205 0x04431500
0x10000a05 0x2440d280 0xd007001d 0x0402c780
0x307c0ffd 0x6c0087d8 0xa0000e1d 0x44065500
0x30170e1d 0xec101500 0x31000e1d 0x04431500
0x10000a1d 0x2440d280 0x30070205 0x8c000780
0x00000605 0xc0000780 0x30218205 0x00000003
0x30800205 0xac400780 0x10018021 0x00000003
0x307c0205 0x8c000780 0x30011025 0xc4000780
0xa0001225 0x44014780 0x3007101d 0xc4000780
0xc409f225 0x00200780 0x203f8e21 0x0fffffff
0xa0001225 0xac004780 0x30091021 0xac000780
0x30008e1d 0x00000003 0x30080e1d 0x8c000780
0xa00c4003 0x00000000 0x100c4003 0x00001100
0x30070a21 0xc4100780 0x30060a25 0xc4100780
0x20099020 0x2108e820 0x20000621 0x04020780
0x20009021 0x00000007 0xd00e101d 0xa0c00780
0xf0000001 0xe0000002 0x30070a21 0xc4100680
0x30060a25 0xc4100680 0x20001021 0x04024680
0x2000c821 0x04220680 0x21001021 0x0441c680
0xd00e1005 0xa0c00680 0x307c0ffd 0x6c0087d8
0xa0000e05 0x44065500 0x30170205 0xec101500
0x31000205 0x04435500 0x10000a05 0x2440d280
0xd007001d 0x0402c780 0x307c0ffd 0x6c0087d8
0xa0000e1d 0x44065500 0x30170e1d 0xec101500
0x31000e1d 0x04435500 0x10000a1d 0x2440d280
0x30070205 0x8c000780 0x30218205 0x00000003
0x40060205 0x00018780 0x00000605 0xc0000780
0x04007201 0xe4204780 0xd401c809 0x20000780
0x1800e005 0x0423c780 0x3801c005 0x8c200780
0x04007201 0xe4204780 0x1800d005 0x0423c780
@@ -712,13 +1319,13 @@ code {
0x04007201 0xe4204780 0x1800c405 0x0423c780
0x3801c005 0x8c200780 0x04007201 0xe4204780
0x1800c205 0x0423c780 0x3801c005 0x8c200780
0x04007201 0xe4204780 0xa00ea003 0x00000000
0x100ea003 0x00000100 0x30070c05 0xc4100780
0x30060c19 0xc4100780 0x20000205 0x04018780
0xd001c805 0x20000780 0x2101e818 0x1500e004
0x200c8c19 0x00000003 0xd00e0c05 0xa0c00780
0xf0000001 0xe0000002 0x20018a15 0x00000003
0x3005cffd 0x6c2147d8 0x10062003 0x00001280
0x04007201 0xe4204780 0xa00f4003 0x00000000
0x100f4003 0x00000100 0x30070a05 0xc4100780
0x30060a15 0xc4100780 0x20000205 0x04014780
0xd001c805 0x20000780 0x2101e814 0x1500e004
0x200c8a15 0x00000003 0xd00e0a05 0xa0c00780
0xf0000001 0xe0000002 0x20018811 0x00000003
0x3004cffd 0x6c2147d8 0x10061003 0x00001280
0xf0000001 0xe0000001
}
}