diff --git a/CUETools.FlaCuda/FlaCudaWriter.cs b/CUETools.FlaCuda/FlaCudaWriter.cs
index 3a3e528..a4fd0bf 100644
--- a/CUETools.FlaCuda/FlaCudaWriter.cs
+++ b/CUETools.FlaCuda/FlaCudaWriter.cs
@@ -557,131 +557,152 @@ namespace CUETools.Codecs.FlaCuda
}
}
- static uint rice_encode_count(uint sum, uint n, uint k)
- {
- return n * (k + 1) + ((sum - (n >> 1)) >> (int)k);
- }
-
- //static unsafe uint find_optimal_rice_param(uint sum, uint n)
- //{
- // uint* nbits = stackalloc uint[Flake.MAX_RICE_PARAM + 1];
- // int k_opt = 0;
-
- // nbits[0] = UINT32_MAX;
- // for (int k = 0; k <= Flake.MAX_RICE_PARAM; k++)
- // {
- // nbits[k] = rice_encode_count(sum, n, (uint)k);
- // if (nbits[k] < nbits[k_opt])
- // k_opt = k;
- // }
- // return (uint)k_opt;
- //}
-
- static unsafe int find_optimal_rice_param(uint sum, uint n, out uint nbits_best)
- {
- int k_opt = 0;
- uint a = n;
- uint b = sum - (n >> 1);
- uint nbits = a + b;
- for (int k = 1; k <= Flake.MAX_RICE_PARAM; k++)
- {
- a += n;
- b >>= 1;
- uint nbits_k = a + b;
- if (nbits_k < nbits)
- {
- k_opt = k;
- nbits = nbits_k;
- }
- }
- nbits_best = nbits;
- return k_opt;
- }
-
- static unsafe uint calc_optimal_rice_params(ref RiceContext rc, int porder, uint* sums, uint n, uint pred_order)
+ static unsafe uint calc_optimal_rice_params(int porder, int* parm, uint* sums, uint n, uint pred_order)
{
uint part = (1U << porder);
- uint all_bits = 0;
- rc.rparams[0] = find_optimal_rice_param(sums[0], (n >> porder) - pred_order, out all_bits);
- uint cnt = (n >> porder);
+ uint cnt = (n >> porder) - pred_order;
+ int k = cnt > 0 ? Math.Min(Flake.MAX_RICE_PARAM, BitReader.log2i(sums[0] / cnt)) : 0;
+ uint all_bits = cnt * ((uint)k + 1U) + (sums[0] >> k);
+ parm[0] = k;
+ cnt = (n >> porder);
for (uint i = 1; i < part; i++)
{
- uint nbits;
- rc.rparams[i] = find_optimal_rice_param(sums[i], cnt, out nbits);
- all_bits += nbits;
+ k = Math.Min(Flake.MAX_RICE_PARAM, BitReader.log2i(sums[i] / cnt));
+ all_bits += cnt * ((uint)k + 1U) + (sums[i] >> k);
+ parm[i] = k;
}
- all_bits += (4 * part);
- rc.porder = porder;
- return all_bits;
+ return all_bits + (4 * part);
}
- static unsafe void calc_sums(int pmin, int pmax, int* data, uint n, uint pred_order, uint* sums)
+ static unsafe void calc_lower_sums(int pmin, int pmax, uint* sums)
{
- // sums for highest level
- int parts = (1 << pmax);
- int* res = data + pred_order;
- uint cnt = (n >> pmax) - pred_order;
- uint sum = 0;
- for (uint j = cnt; j > 0; j--)
- {
- int val = *(res++);
- sum += (uint)((val << 1) ^ (val >> 31));
- }
- sums[pmax * Flake.MAX_PARTITIONS + 0] = sum;
- cnt = (n >> pmax);
- for (int i = 1; i < parts; i++)
- {
- sum = 0;
- for (uint j = cnt; j > 0; j--)
- {
- int val = *(res++);
- sum += (uint)((val << 1) ^ (val >> 31));
- }
- sums[pmax * Flake.MAX_PARTITIONS + i] = sum;
- }
- // sums for lower levels
for (int i = pmax - 1; i >= pmin; i--)
{
- parts = (1 << i);
- for (int j = 0; j < parts; j++)
+ for (int j = 0; j < (1 << i); j++)
{
- sums[i * Flake.MAX_PARTITIONS + j] =
- sums[(i + 1) * Flake.MAX_PARTITIONS + 2 * j] +
+ sums[i * Flake.MAX_PARTITIONS + j] =
+ sums[(i + 1) * Flake.MAX_PARTITIONS + 2 * j] +
sums[(i + 1) * Flake.MAX_PARTITIONS + 2 * j + 1];
}
}
}
- static unsafe uint calc_rice_params(ref RiceContext rc, ref RiceContext tmp_rc, int pmin, int pmax, int* data, uint n, uint pred_order)
+ static unsafe void calc_sums(int pmin, int pmax, uint* data, uint n, uint pred_order, uint* sums)
{
- //uint* udata = stackalloc uint[(int)n];
+ int parts = (1 << pmax);
+ uint* res = data + pred_order;
+ uint cnt = (n >> pmax) - pred_order;
+ uint sum = 0;
+ for (uint j = cnt; j > 0; j--)
+ sum += *(res++);
+ sums[0] = sum;
+ cnt = (n >> pmax);
+ for (int i = 1; i < parts; i++)
+ {
+ sum = 0;
+ for (uint j = cnt; j > 0; j--)
+ sum += *(res++);
+ sums[i] = sum;
+ }
+ }
+
+ ///
+ /// Special case when (n >> pmax) == 18
+ ///
+ ///
+ ///
+ ///
+ ///
+ ///
+ ///
+ static unsafe void calc_sums18(int pmin, int pmax, uint* data, uint n, uint pred_order, uint* sums)
+ {
+ int parts = (1 << pmax);
+ uint* res = data + pred_order;
+ uint cnt = 18 - pred_order;
+ uint sum = 0;
+ for (uint j = cnt; j > 0; j--)
+ sum += *(res++);
+ sums[0] = sum;
+ for (int i = 1; i < parts; i++)
+ {
+ sums[i] =
+ *(res++) + *(res++) + *(res++) + *(res++) +
+ *(res++) + *(res++) + *(res++) + *(res++) +
+ *(res++) + *(res++) + *(res++) + *(res++) +
+ *(res++) + *(res++) + *(res++) + *(res++) +
+ *(res++) + *(res++);
+ }
+ }
+
+ ///
+ /// Special case when (n >> pmax) == 18
+ ///
+ ///
+ ///
+ ///
+ ///
+ ///
+ ///
+ static unsafe void calc_sums16(int pmin, int pmax, uint* data, uint n, uint pred_order, uint* sums)
+ {
+ int parts = (1 << pmax);
+ uint* res = data + pred_order;
+ uint cnt = 16 - pred_order;
+ uint sum = 0;
+ for (uint j = cnt; j > 0; j--)
+ sum += *(res++);
+ sums[0] = sum;
+ for (int i = 1; i < parts; i++)
+ {
+ sums[i] =
+ *(res++) + *(res++) + *(res++) + *(res++) +
+ *(res++) + *(res++) + *(res++) + *(res++) +
+ *(res++) + *(res++) + *(res++) + *(res++) +
+ *(res++) + *(res++) + *(res++) + *(res++);
+ }
+ }
+
+ static unsafe uint calc_rice_params(RiceContext rc, int pmin, int pmax, int* data, uint n, uint pred_order)
+ {
+ uint* udata = stackalloc uint[(int)n];
uint* sums = stackalloc uint[(pmax + 1) * Flake.MAX_PARTITIONS];
+ int* parm = stackalloc int[(pmax + 1) * Flake.MAX_PARTITIONS];
//uint* bits = stackalloc uint[Flake.MAX_PARTITION_ORDER];
//assert(pmin >= 0 && pmin <= Flake.MAX_PARTITION_ORDER);
//assert(pmax >= 0 && pmax <= Flake.MAX_PARTITION_ORDER);
//assert(pmin <= pmax);
- //for (uint i = 0; i < n; i++)
- // udata[i] = (uint) ((2 * data[i]) ^ (data[i] >> 31));
+ for (uint i = 0; i < n; i++)
+ udata[i] = (uint)((data[i] << 1) ^ (data[i] >> 31));
- calc_sums(pmin, pmax, data, n, pred_order, sums);
+ // sums for highest level
+ if ((n >> pmax) == 18)
+ calc_sums18(pmin, pmax, udata, n, pred_order, sums + pmax * Flake.MAX_PARTITIONS);
+ else if ((n >> pmax) == 16)
+ calc_sums16(pmin, pmax, udata, n, pred_order, sums + pmax * Flake.MAX_PARTITIONS);
+ else
+ calc_sums(pmin, pmax, udata, n, pred_order, sums + pmax * Flake.MAX_PARTITIONS);
+ // sums for lower levels
+ calc_lower_sums(pmin, pmax, sums);
- int opt_porder = pmin;
uint opt_bits = AudioSamples.UINT32_MAX;
+ int opt_porder = pmin;
for (int i = pmin; i <= pmax; i++)
{
- uint bits = calc_optimal_rice_params(ref tmp_rc, i, sums + i * Flake.MAX_PARTITIONS, n, pred_order);
+ uint bits = calc_optimal_rice_params(i, parm + i * Flake.MAX_PARTITIONS, sums + i * Flake.MAX_PARTITIONS, n, pred_order);
if (bits <= opt_bits)
{
- opt_porder = i;
opt_bits = bits;
- RiceContext tmp_rc2 = rc;
- rc = tmp_rc;
- tmp_rc = tmp_rc2;
+ opt_porder = i;
}
}
+ rc.porder = opt_porder;
+ fixed (int* rparms = rc.rparams)
+ AudioSamples.MemCpy(rparms, parm + opt_porder * Flake.MAX_PARTITIONS, (1 << opt_porder));
+
return opt_bits;
}
@@ -990,7 +1011,7 @@ namespace CUETools.Codecs.FlaCuda
int pmin = get_max_p_order(eparams.min_partition_order, frame.blocksize, frame.subframes[ch].best.order);
int pmax = get_max_p_order(eparams.max_partition_order, frame.blocksize, frame.subframes[ch].best.order);
uint bits = (uint)frame.subframes[ch].best.order * frame.subframes[ch].obits + 6;
- frame.subframes[ch].best.size = bits + calc_rice_params(ref frame.subframes[ch].best.rc, ref frame.current.rc, pmin, pmax, frame.subframes[ch].best.residual, (uint)frame.blocksize, (uint)frame.subframes[ch].best.order);
+ frame.subframes[ch].best.size = bits + calc_rice_params(frame.subframes[ch].best.rc, pmin, pmax, frame.subframes[ch].best.residual, (uint)frame.blocksize, (uint)frame.subframes[ch].best.order);
}
break;
case SubframeType.LPC:
@@ -1009,7 +1030,7 @@ namespace CUETools.Codecs.FlaCuda
int pmax = get_max_p_order(eparams.max_partition_order, frame.blocksize, frame.subframes[ch].best.order);
uint bits = (uint)frame.subframes[ch].best.order * frame.subframes[ch].obits + 4 + 5 + (uint)frame.subframes[ch].best.order * (uint)frame.subframes[ch].best.cbits + 6;
//uint oldsize = frame.subframes[ch].best.size;
- frame.subframes[ch].best.size = bits + calc_rice_params(ref frame.subframes[ch].best.rc, ref frame.current.rc, pmin, pmax, frame.subframes[ch].best.residual, (uint)frame.blocksize, (uint)frame.subframes[ch].best.order);
+ frame.subframes[ch].best.size = bits + calc_rice_params(frame.subframes[ch].best.rc, pmin, pmax, frame.subframes[ch].best.residual, (uint)frame.blocksize, (uint)frame.subframes[ch].best.order);
//if (frame.subframes[ch].best.size > frame.subframes[ch].obits * (uint)frame.blocksize &&
// oldsize <= frame.subframes[ch].obits * (uint)frame.blocksize)
// throw new Exception("oops");
@@ -1213,9 +1234,10 @@ namespace CUETools.Codecs.FlaCuda
cuda.SetParameter(task.cudaChooseBestMethod, 0 * sizeof(uint), (uint)task.cudaResidualTasks.Pointer);
cuda.SetParameter(task.cudaChooseBestMethod, 1 * sizeof(uint), (uint)task.cudaResidualOutput.Pointer);
- cuda.SetParameter(task.cudaChooseBestMethod, 2 * sizeof(uint), (uint)residualPartCount);
- cuda.SetParameter(task.cudaChooseBestMethod, 3 * sizeof(uint), (uint)task.nResidualTasksPerChannel);
- cuda.SetParameterSize(task.cudaChooseBestMethod, sizeof(uint) * 4U);
+ cuda.SetParameter(task.cudaChooseBestMethod, 2 * sizeof(uint), (uint)residualPartSize);
+ cuda.SetParameter(task.cudaChooseBestMethod, 3 * sizeof(uint), (uint)residualPartCount);
+ cuda.SetParameter(task.cudaChooseBestMethod, 4 * sizeof(uint), (uint)task.nResidualTasksPerChannel);
+ cuda.SetParameterSize(task.cudaChooseBestMethod, 5U * sizeof(uint));
cuda.SetFunctionBlockShape(task.cudaChooseBestMethod, 32, 8, 1);
cuda.SetParameter(task.cudaCopyBestMethod, 0, (uint)task.cudaBestResidualTasks.Pointer);
diff --git a/CUETools.FlaCuda/flacuda.cu b/CUETools.FlaCuda/flacuda.cu
index 21d7805..a40d99f 100644
--- a/CUETools.FlaCuda/flacuda.cu
+++ b/CUETools.FlaCuda/flacuda.cu
@@ -61,6 +61,7 @@ typedef struct
#define SUM512(buf,tid,op) if (tid < 256) buf[tid] op buf[tid + 256]; __syncthreads(); SUM256(buf,tid,op)
#define FSQR(s) ((s)*(s))
+#define FASTMUL(a,b) __mul24(a,b)
extern "C" __global__ void cudaStereoDecorr(
int *samples,
@@ -581,14 +582,8 @@ extern "C" __global__ void cudaEstimateResidual(
shared.residual[tid] = shared.residual[tid] + shared.residual[tid + 8] + shared.residual[tid + 16] + shared.residual[tid + 24];
shared.residual[tid] = shared.residual[tid] + shared.residual[tid + 2] + shared.residual[tid + 4] + shared.residual[tid + 6];
- shared.residual[tid] += shared.residual[tid + 1];
-
- // rice parameter search
- shared.residual[tid] = (shared.task[threadIdx.y].type != Constant || shared.residual[threadIdx.y << 5] != 0) *
- (__mul24(threadIdx.x >= 15, 0x7fffff) + residualLen * (threadIdx.x + 1) + ((shared.residual[threadIdx.y << 5] - (residualLen >> 1)) >> threadIdx.x));
- shared.residual[tid] = min(min(shared.residual[tid], shared.residual[tid + 4]), min(shared.residual[tid + 8], shared.residual[tid + 12]));
if (threadIdx.x == 0)
- output[(blockIdx.y * blockDim.y + threadIdx.y) * 64 + blockIdx.x] = min(min(shared.residual[tid], shared.residual[tid + 1]), min(shared.residual[tid + 2], shared.residual[tid + 3]));
+ output[(blockIdx.y * blockDim.y + threadIdx.y) * 64 + blockIdx.x] = shared.residual[tid] + shared.residual[tid + 1];
}
extern "C" __global__ void cudaEstimateResidual1(
@@ -612,7 +607,7 @@ extern "C" __global__ void cudaEstimateResidual1(
if (tid == 0)
{
shared.pos = blockIdx.x * partSize;
- shared.dataLen = min(shared.task.data.blocksize - shared.pos, partSize + max_order);
+ shared.dataLen = min(shared.task.data.blocksize - shared.pos, partSize + shared.task.data.residualOrder);
}
__syncthreads();
@@ -631,22 +626,10 @@ extern "C" __global__ void cudaEstimateResidual1(
shared.residual[tid] = __mul24(ptr < shared.dataLen, min(0x7fffff,(sum << 1) ^ (sum >> 31)));
__syncthreads();
SUM256(shared.residual, tid, +=);
-
- if (threadIdx.y == 0)
- {
- const int residualLen = max(0,min(shared.task.data.blocksize - shared.pos - shared.task.data.residualOrder, partSize));
-
- // rice parameter search
- shared.residual[threadIdx.x] = (shared.task.data.type != Constant || shared.residual[0] != 0) *
- (__mul24(threadIdx.x >= 15, 0x7fffff) + residualLen * (threadIdx.x + 1) + ((shared.residual[0] - (residualLen >> 1)) >> threadIdx.x));
- shared.residual[threadIdx.x] = min(min(shared.residual[threadIdx.x], shared.residual[threadIdx.x + 4]), min(shared.residual[threadIdx.x + 8], shared.residual[threadIdx.x + 12]));
- if (threadIdx.x == 0)
- output[blockIdx.y * 64 + blockIdx.x] = min(min(shared.residual[threadIdx.x], shared.residual[threadIdx.x + 1]), min(shared.residual[threadIdx.x + 2], shared.residual[threadIdx.x + 3]));
- }
+ if (tid == 0)
+ output[blockIdx.y * 64 + blockIdx.x] = shared.residual[0];
}
-#define FASTMUL(a,b) __mul24(a,b)
-
extern "C" __global__ void cudaEstimateResidual8(
int*output,
int*samples,
@@ -656,57 +639,61 @@ extern "C" __global__ void cudaEstimateResidual8(
)
{
__shared__ struct {
- int data[32*9];
+ volatile int data[32*9];
volatile int residual[32*8];
FlaCudaSubframeData task[8];
int coefs[32*8];
+ volatile int pos;
+ volatile int dataLen;
+ volatile int dataOffs;
} shared;
const int tid = threadIdx.x + threadIdx.y * 32;
+ const int taskNo = FASTMUL(blockIdx.y, blockDim.y) + threadIdx.y;
if (threadIdx.x < sizeof(FlaCudaSubframeData)/sizeof(int))
- ((int*)&shared.task[threadIdx.y])[threadIdx.x] = ((int*)(&tasks[blockIdx.y * blockDim.y + threadIdx.y]))[threadIdx.x];
+ ((int*)&shared.task[threadIdx.y])[threadIdx.x] = ((int*)(&tasks[taskNo]))[threadIdx.x];
+ const int ro = shared.task[threadIdx.y].residualOrder;
+ shared.coefs[tid] = threadIdx.x < ro ? tasks[taskNo].coefs[threadIdx.x] : 0;
+ if (tid == 0)
+ {
+ shared.pos = FASTMUL(blockIdx.x, partSize);
+ shared.dataLen = min(shared.task[0].blocksize - shared.pos, partSize + max_order);
+ shared.dataOffs = shared.task[0].samplesOffs + shared.pos;
+ }
__syncthreads();
- const int pos = blockIdx.x * partSize;
- const int dataLen = min(shared.task[0].blocksize - pos, partSize + max_order);
// fetch samples
- shared.data[tid] = tid < dataLen ? samples[shared.task[0].samplesOffs + pos + tid] >> shared.task[0].wbits : 0;
- if (tid < 32) shared.data[tid + partSize] = tid + partSize < dataLen ? samples[shared.task[0].samplesOffs + pos + tid + partSize] >> shared.task[0].wbits : 0;
+ if (tid < shared.dataLen)
+ shared.data[tid] = samples[shared.dataOffs + tid] >> shared.task[0].wbits;
+ if (tid + partSize < shared.dataLen)
+ shared.data[tid + partSize] = samples[shared.dataOffs + tid + partSize] >> shared.task[0].wbits;
__syncthreads();
- shared.residual[tid] = 0;
- shared.coefs[tid] = threadIdx.x < shared.task[threadIdx.y].residualOrder ? tasks[blockIdx.y * blockDim.y + threadIdx.y].coefs[threadIdx.x] : 0;
-
- const int residualLen = max(0,min(shared.task[0].blocksize - pos - shared.task[threadIdx.y].residualOrder, partSize));
+ const int residualLen = max(0,min(shared.dataLen - ro, partSize));
const int ptr2 = threadIdx.y << 5;
int s = 0;
- for (int ptr = threadIdx.x + blockDim.y * 32 * (shared.task[threadIdx.y].type == Verbatim); ptr < blockDim.y * 32 + threadIdx.x; ptr += 32)
+ for (int ptr = threadIdx.x; ptr < residualLen; ptr += 32)
{
// compute residual
int sum =
__mul24(shared.data[ptr + 0], shared.coefs[ptr2 + 0]) +
__mul24(shared.data[ptr + 1], shared.coefs[ptr2 + 1]) +
__mul24(shared.data[ptr + 2], shared.coefs[ptr2 + 2]) +
- __mul24(shared.data[ptr + 3], shared.coefs[ptr2 + 3]) +
+ __mul24(shared.data[ptr + 3], shared.coefs[ptr2 + 3]);
+ sum +=
__mul24(shared.data[ptr + 4], shared.coefs[ptr2 + 4]) +
__mul24(shared.data[ptr + 5], shared.coefs[ptr2 + 5]) +
__mul24(shared.data[ptr + 6], shared.coefs[ptr2 + 6]) +
__mul24(shared.data[ptr + 7], shared.coefs[ptr2 + 7]);
- sum = shared.data[ptr + shared.task[threadIdx.y].residualOrder] - (sum >> shared.task[threadIdx.y].shift);
- s += __mul24(ptr < residualLen, min(0x7fffff,(sum << 1) ^ (sum >> 31)));
+ sum = shared.data[ptr + ro] - (sum >> shared.task[threadIdx.y].shift);
+ s += min(0x7fffff,(sum << 1) ^ (sum >> 31));
}
shared.residual[tid] = s;
shared.residual[tid] = shared.residual[tid] + shared.residual[tid + 8] + shared.residual[tid + 16] + shared.residual[tid + 24];
shared.residual[tid] = shared.residual[tid] + shared.residual[tid + 2] + shared.residual[tid + 4] + shared.residual[tid + 6];
- shared.residual[tid] += shared.residual[tid + 1];
-
- // rice parameter search
- shared.residual[tid] = (shared.task[threadIdx.y].type != Constant || shared.residual[threadIdx.y << 5] != 0) *
- (__mul24(threadIdx.x >= 15, 0x7fffff) + residualLen * (threadIdx.x + 1) + ((shared.residual[threadIdx.y << 5] - (residualLen >> 1)) >> threadIdx.x));
- shared.residual[tid] = min(min(shared.residual[tid], shared.residual[tid + 4]), min(shared.residual[tid + 8], shared.residual[tid + 12]));
if (threadIdx.x == 0)
- output[(blockIdx.y * blockDim.y + threadIdx.y) * 64 + blockIdx.x] = min(min(shared.residual[tid], shared.residual[tid + 1]), min(shared.residual[tid + 2], shared.residual[tid + 3]));
+ output[(blockIdx.y * blockDim.y + threadIdx.y) * 64 + blockIdx.x] = shared.residual[tid] + shared.residual[tid + 1];
}
extern "C" __global__ void cudaEstimateResidual12(
@@ -727,8 +714,11 @@ extern "C" __global__ void cudaEstimateResidual12(
volatile int dataOffs;
} shared;
const int tid = threadIdx.x + threadIdx.y * 32;
+ const int taskNo = FASTMUL(blockIdx.y, blockDim.y) + threadIdx.y;
if (threadIdx.x < sizeof(FlaCudaSubframeData)/sizeof(int))
- ((int*)&shared.task[threadIdx.y])[threadIdx.x] = ((int*)(&tasks[FASTMUL(blockIdx.y, blockDim.y) + threadIdx.y]))[threadIdx.x];
+ ((int*)&shared.task[threadIdx.y])[threadIdx.x] = ((int*)(&tasks[taskNo]))[threadIdx.x];
+ const int ro = shared.task[threadIdx.y].residualOrder;
+ shared.coefs[tid] = threadIdx.x < ro ? tasks[taskNo].coefs[threadIdx.x] : 0;
if (tid == 0)
{
shared.pos = FASTMUL(blockIdx.x, partSize);
@@ -738,30 +728,30 @@ extern "C" __global__ void cudaEstimateResidual12(
__syncthreads();
// fetch samples
- shared.data[tid] = tid < shared.dataLen ? samples[shared.dataOffs + tid] >> shared.task[0].wbits : 0;
- if (tid < 32) shared.data[tid + partSize] = tid + partSize < shared.dataLen ? samples[shared.dataOffs + tid + partSize] >> shared.task[0].wbits : 0;
+ if (tid < shared.dataLen)
+ shared.data[tid] = samples[shared.dataOffs + tid] >> shared.task[0].wbits;
+ if (tid + partSize < shared.dataLen)
+ shared.data[tid + partSize] = samples[shared.dataOffs + tid + partSize] >> shared.task[0].wbits;
__syncthreads();
- const int ro = shared.task[threadIdx.y].residualOrder;
- const int residualLen = max(0,min(shared.task[0].blocksize - shared.pos - ro, partSize));
+ int residualLen = max(0,min(shared.dataLen - ro, partSize));
const int ptr2 = threadIdx.y << 5;
-
- shared.coefs[tid] = threadIdx.x < ro ? tasks[FASTMUL(blockIdx.y, blockDim.y) + threadIdx.y].coefs[threadIdx.x] : 0;
-
int s = 0;
- for (int ptr = shared.task[threadIdx.y].type == Verbatim ? residualLen : threadIdx.x; ptr < residualLen; ptr += 32)
+ for (int ptr = threadIdx.x; ptr < residualLen; ptr += 32)
{
// compute residual
int sum =
FASTMUL(shared.data[ptr + 0], shared.coefs[ptr2 + 0]) +
FASTMUL(shared.data[ptr + 1], shared.coefs[ptr2 + 1]) +
FASTMUL(shared.data[ptr + 2], shared.coefs[ptr2 + 2]) +
- FASTMUL(shared.data[ptr + 3], shared.coefs[ptr2 + 3]) +
+ FASTMUL(shared.data[ptr + 3], shared.coefs[ptr2 + 3]);
+ sum +=
FASTMUL(shared.data[ptr + 4], shared.coefs[ptr2 + 4]) +
FASTMUL(shared.data[ptr + 5], shared.coefs[ptr2 + 5]) +
FASTMUL(shared.data[ptr + 6], shared.coefs[ptr2 + 6]) +
- FASTMUL(shared.data[ptr + 7], shared.coefs[ptr2 + 7]) +
+ FASTMUL(shared.data[ptr + 7], shared.coefs[ptr2 + 7]);
+ sum +=
FASTMUL(shared.data[ptr + 8], shared.coefs[ptr2 + 8]) +
FASTMUL(shared.data[ptr + 9], shared.coefs[ptr2 + 9]) +
FASTMUL(shared.data[ptr + 10], shared.coefs[ptr2 + 10]) +
@@ -773,19 +763,14 @@ extern "C" __global__ void cudaEstimateResidual12(
shared.residual[tid] = s;
shared.residual[tid] = shared.residual[tid] + shared.residual[tid + 8] + shared.residual[tid + 16] + shared.residual[tid + 24];
shared.residual[tid] = shared.residual[tid] + shared.residual[tid + 2] + shared.residual[tid + 4] + shared.residual[tid + 6];
- shared.residual[tid] += shared.residual[tid + 1];
-
- // rice parameter search
- shared.residual[tid] = (shared.task[threadIdx.y].type != Constant || shared.residual[threadIdx.y << 5] != 0) *
- (__mul24(threadIdx.x >= 15, 0x7fffff) + FASTMUL(residualLen, threadIdx.x + 1) + ((shared.residual[threadIdx.y << 5] - (residualLen >> 1)) >> threadIdx.x));
- shared.residual[tid] = min(min(shared.residual[tid], shared.residual[tid + 4]), min(shared.residual[tid + 8], shared.residual[tid + 12]));
if (threadIdx.x == 0)
- output[(blockIdx.y * blockDim.y + threadIdx.y) * 64 + blockIdx.x] = min(min(shared.residual[tid], shared.residual[tid + 1]), min(shared.residual[tid + 2], shared.residual[tid + 3]));
+ output[(blockIdx.y * blockDim.y + threadIdx.y) * 64 + blockIdx.x] = shared.residual[tid] + shared.residual[tid + 1];
}
extern "C" __global__ void cudaChooseBestMethod(
FlaCudaSubframeTask *tasks,
int *residual,
+ int partSize,
int partCount, // <= blockDim.y (256)
int taskCount
)
@@ -806,8 +791,18 @@ extern "C" __global__ void cudaChooseBestMethod(
((int*)&shared.task[threadIdx.y])[threadIdx.x] = ((int*)(tasks + task + threadIdx.y + taskCount * blockIdx.y))[threadIdx.x];
int sum = 0;
- for (int pos = 0; pos < partCount; pos += blockDim.x)
- sum += (pos + threadIdx.x < partCount ? residual[pos + threadIdx.x + 64 * (task + threadIdx.y + taskCount * blockIdx.y)] : 0);
+ for (int pos = threadIdx.x; pos < partCount; pos += blockDim.x)
+ {
+ // fetch part sum
+ int psum = residual[pos + 64 * (task + threadIdx.y + taskCount * blockIdx.y)];
+ // calculate part size
+ int residualLen = max(0,min(shared.task[threadIdx.y].data.blocksize - FASTMUL(pos, partSize) - shared.task[threadIdx.y].data.residualOrder, partSize));
+ residualLen = FASTMUL(residualLen, shared.task[threadIdx.y].data.type != Constant || psum != 0);
+ // calculate rice parameter
+ int k = max(0, min(14, __float2int_rz(__log2f((psum + 0.000001f) / (residualLen + 0.000001f) + 0.5f))));
+ // calculate part bit length
+ sum += FASTMUL(residualLen, k + 1) + (psum >> k);
+ }
shared.partLen[tid] = sum;
// length sum: reduction in shared mem
diff --git a/CUETools.FlaCuda/flacuda.cubin b/CUETools.FlaCuda/flacuda.cubin
index 11b0cbe..ea93441 100644
--- a/CUETools.FlaCuda/flacuda.cubin
+++ b/CUETools.FlaCuda/flacuda.cubin
@@ -562,128 +562,99 @@ code {
name = cudaEstimateResidual12
lmem = 0
smem = 3760
- reg = 15
+ reg = 13
bar = 1
const {
segname = const
segnum = 1
offset = 0
- bytes = 24
+ bytes = 12
mem {
- 0x000003ff 0x0000000f 0x0000001f 0x00000001
- 0x007fffff 0x0000000e
+ 0x000003ff 0x0000000f 0x007fffff
}
}
bincode {
- 0xd0800205 0x00400780 0xa0000021 0x04000780
- 0xa0000219 0x04000780 0x308111fd 0x644107c8
- 0xa0013003 0x00000000 0x30060c1d 0xc4100780
- 0x10013003 0x00000280 0xa0004e01 0x04200780
- 0xa0004405 0x04200780 0x40418000 0x20008c00
- 0x30070005 0xc4100780 0x30060001 0xc4100780
- 0x20000205 0x04000780 0x30021001 0xc4100780
- 0x2101ec08 0x20008e04 0x20000001 0x04008780
- 0xd00e0001 0x80c00780 0x00000205 0xc0000780
- 0x04045201 0xe4200780 0x30050c01 0xc4100782
- 0x20000025 0x040207c0 0xa0022003 0x00000000
- 0x10022003 0x00000280 0xa0004c01 0x04200780
- 0x4000d001 0x00218780 0x00075201 0xe4200780
- 0xd01d4809 0x20000780 0xd0115005 0x20000780
- 0x1100f000 0x1900e004 0x2100ee08 0x2541ec00
- 0x30020001 0xac000780 0x00075401 0xe4200780
- 0x1900e000 0x2500e000 0x00075601 0xe4200780
+ 0xd0800205 0x00400780 0xa0000019 0x04000780
+ 0xa0000215 0x04000780 0x30810dfd 0x644107c8
+ 0xa0013003 0x00000000 0x30060a01 0xc4100780
+ 0x10013003 0x00000280 0xa0004e05 0x04200780
+ 0xa0004409 0x04200780 0x40428204 0x20018a04
+ 0x30070209 0xc4100780 0x30060205 0xc4100780
+ 0x20000409 0x04004780 0x30020c05 0xc4100780
+ 0x2102ec0c 0x20018008 0x20000205 0x0400c780
+ 0xd00e0205 0x80c00780 0x00000405 0xc0000780
+ 0x04045201 0xe4204780 0x00000005 0xc0000782
+ 0xd4114809 0x20000780 0x3806c1fd 0x6420c7c8
+ 0xa0025003 0x00000000 0x1800c001 0x0423c780
+ 0x10024003 0x00000280 0xa0004e05 0x04200780
+ 0xa0004409 0x04200780 0x40428204 0x20018a04
+ 0x30070209 0xc4100780 0x30060205 0xc4100780
+ 0x20000405 0x04004780 0x30020c09 0xc4100780
+ 0x2101ec04 0x20018404 0x20008205 0x00000007
+ 0xd00e0205 0x80c00780 0x10025003 0x00000780
+ 0x1000f805 0x0403c780 0x30050a09 0xc4100782
+ 0x2000041d 0x040187c0 0x00020e09 0xc0000780
+ 0x08055201 0xe4204780 0xa0036003 0x00000000
+ 0x10036003 0x00000280 0xa0004c05 0x04200780
+ 0x4001d005 0x00218780 0x00075201 0xe4204780
+ 0xd01d480d 0x20000780 0xd0115009 0x20000780
+ 0x1100f004 0x1d00e008 0x2101ee0c 0x2942ec04
+ 0x30030205 0xac000780 0x00075401 0xe4204780
+ 0x1d00e004 0x2901e004 0x00075601 0xe4204780
0xf0000001 0xe0000002 0x861ffe03 0x00000000
- 0xd01d5005 0x20000780 0x3409c1fd 0x6c20c7c8
- 0xa0032003 0x00000000 0x10031003 0x00000280
- 0xd01d5805 0x20000780 0x2400c001 0x04224780
- 0x30020001 0xc4100780 0xd011a005 0x20000780
- 0x2000ca01 0x04200780 0xd00e0005 0x80c00780
- 0x1400c001 0x0423c780 0x30000201 0xec000780
- 0x10032003 0x00000780 0x1000f801 0x0403c780
- 0x308213fd 0x6c4107ca 0x00021209 0xc0000780
- 0x08001201 0xe4200780 0xa0048003 0x00000000
- 0x10048003 0x00000280 0xd01d5005 0x20000780
- 0x2000d001 0x04224780 0x3400c1fd 0x6c20c7c8
- 0x00020005 0xc0000780 0xa0047003 0x00000000
- 0x10046003 0x00000280 0xd01d5809 0x20000780
- 0x2909e000 0x2100f000 0x30020001 0xc4100780
- 0xd011a009 0x20000780 0x2000ca01 0x04200780
- 0xd00e0005 0x80c00780 0x1800c001 0x0423c780
- 0x30000201 0xec000780 0x10047003 0x00000780
- 0x1000f801 0x0403c780 0x04001201 0xe4200782
- 0xf0000001 0xe0000002 0x861ffe03 0x00000000
- 0xd01d4805 0x20000780 0xd011800d 0x20000780
- 0x1400c001 0x0423c780 0x00000e09 0xc0000780
- 0xd8114805 0x20000780 0x2c40c005 0x04200780
- 0x3408c1fd 0x6420c7c8 0xa0060003 0x00000000
- 0x1500e000 0x3501e004 0x1005f003 0x00000280
- 0xa0004e09 0x04200780 0xa000440d 0x04200780
- 0x40438408 0x20028c08 0x3007040d 0xc4100780
- 0x30060409 0xc4100780 0x20000609 0x04008780
- 0x3002100d 0xc4100780 0x2102ec08 0x20028608
- 0x20008409 0x00000007 0xd00e0409 0x80c00780
- 0x10060003 0x00000780 0x1000f809 0x0403c780
- 0x3001d005 0xac200782 0x00021205 0xc0000780
- 0x04055201 0xe4208780 0x00000e05 0xc0000780
- 0xd4117005 0x20000780 0x307c0205 0x8c000780
- 0x3483c1fd 0x6c6147c8 0x10000209 0x0403c780
- 0x10001009 0x0403c280 0x300203fd 0x6c00c7c8
- 0xa0099003 0x00000000 0x1000f831 0x0403c780
- 0x10098003 0x00000280 0x20000429 0x04000780
- 0x20098409 0x00000003 0x2009940d 0x00000003
- 0x00070c05 0xc0000780 0x0002040d 0xc0000780
- 0x00020611 0xc0000780 0x2000002d 0x04004780
- 0xd4154809 0x20000780 0x1900e20c 0x1900e610
- 0x1900e008 0x1900e400 0x4d43e214 0x4d44e610
- 0x1800ca0d 0x0423c780 0x6c02c009 0x80214780
- 0x6c00c411 0x80210780 0x1900e800 0x4d43ea0c
- 0x20000409 0x04010780 0x6c00c801 0x8020c780
- 0x20008434 0x1900ee0c 0x1900ec10 0x1900f208
- 0x1900f014 0x1900f638 0x1900f400 0x4d43ee0c
- 0x00000e09 0xc0000780 0xd8115809 0x20000780
- 0x6c04cc0d 0x8020c780 0x1900e010 0x4d42f208
- 0x20001a0d 0x0400c780 0x6c05d009 0x80208780
- 0x4d4ef614 0x20028608 0x6c00d401 0x80214780
- 0x20000401 0x04000780 0x30040001 0xec000780
- 0x2040c001 0x04200784 0x301f0009 0xec100780
- 0x30010001 0xc4100780 0xd0000401 0x04008780
- 0x20209429 0x00000003 0x30840001 0xac400780
- 0x300b15fd 0x6c0047c8 0x20001831 0x04000780
- 0xdc01000d 0x20000780 0xd0010011 0x20000784
- 0x10074003 0x00000280 0x10099003 0x00000780
- 0x00070c05 0xc0000780 0x00021209 0xc0000782
- 0x08025201 0xe4230780 0xd809480d 0x20000780
- 0x1c00d001 0x0423c780 0x1c00f009 0x0423c780
- 0x2c00c001 0x04200780 0x2c00e009 0x04208780
- 0x20000001 0x04008780 0x08025201 0xe4200780
- 0x1d00e400 0x1d00ec08 0x2d00e000 0x2d02e808
- 0x20000001 0x04008780 0x08025201 0xe4200780
- 0x1d00e200 0x2d00e000 0x08025201 0xe4200780
- 0x00000e09 0xc0000780 0xd8117009 0x20000780
- 0x387cc1fd 0x6c2147c8 0xa00b3003 0x00000000
- 0x100b0003 0x00000280 0xd4094809 0x20000780
- 0x387cc1fd 0x6c2087c8 0x100b2003 0x00000280
- 0x10018001 0x00000003 0x100b3003 0x00000780
- 0x1000f801 0x0403c780 0x30851009 0x64410782
- 0xd4094805 0x20000780 0xa000040d 0x2c014780
- 0x30010211 0xec100780 0x20019009 0x00000003
- 0x407f860d 0x0007ffff 0x2440c011 0x04210780
- 0x60020205 0x8000c780 0x30080809 0xec000780
- 0x20000205 0x04008780 0x40030009 0x00000780
- 0x60020209 0x00008780 0x30100409 0xc4100780
- 0x60020001 0x00008780 0x00021209 0xc0000780
- 0x08025201 0xe4200780 0xd8094805 0x20000780
- 0x1500e800 0x1500f804 0x3400c001 0xac200780
- 0x3401d005 0xac200780 0x30010001 0xac000780
- 0x307c11fd 0x640147c8 0x08025201 0xe4200780
+ 0xd01d5009 0x20000780 0x3807c1fd 0x6c20c7c8
+ 0xa0046003 0x00000000 0x10046003 0x00000280
+ 0xd01d5809 0x20000780 0x2800c005 0x0421c780
+ 0x30020205 0xc4100780 0xd011a009 0x20000780
+ 0x2000ca05 0x04204780 0xd00e0209 0x80c00780
+ 0x1800c005 0x0423c780 0x30010405 0xec000780
+ 0x00020e09 0xc0000780 0x08001201 0xe4204780
+ 0xd01d5009 0x20000782 0x2000d005 0x0421c780
+ 0x3801c1fd 0x6c20c7c8 0x00020209 0xc0000780
+ 0xa0055003 0x00000000 0x10055003 0x00000280
+ 0xd01d580d 0x20000780 0x2d07e004 0x2101f004
+ 0x30020205 0xc4100780 0xd011a00d 0x20000780
+ 0x2000ca05 0x04204780 0xd00e0209 0x80c00780
+ 0x1c00c005 0x0423c780 0x30010405 0xec000780
+ 0x08001201 0xe4204780 0xf0000001 0xe0000002
+ 0x861ffe03 0x00000000 0xd01d5009 0x20000780
+ 0x2840c005 0x04200780 0x3001d005 0xac200780
+ 0x307c0205 0x8c000780 0x300603fd 0x6c00c7c8
+ 0xa0088003 0x00000000 0x1000f821 0x0403c780
+ 0x10088003 0x00000280 0x20000025 0x04018780
+ 0x20098c09 0x00000003 0x2009920d 0x00000003
+ 0x20000029 0x04004780 0x0002040d 0xc0000780
+ 0x00020611 0xc0000780 0x00070a09 0xc0000780
+ 0xd8154809 0x20000780 0x1900e204 0x1900e60c
+ 0x1900e000 0x1900e408 0x4d41e204 0x4d43e60c
+ 0x6c00c001 0x80204780 0x6c02c405 0x8020c780
+ 0x20018004 0x1900e800 0x6c00c805 0x80204780
+ 0x1800ca01 0x0423c780 0x6c00ca31 0x80204780
+ 0x1900ee2c 0x1900ec10 0x1900f004 0x1900f200
+ 0x1900f60c 0x1900f408 0x4c0bce2d 0x00218780
+ 0xd4115809 0x20000780 0x6c04cc2d 0x8022c780
+ 0x1900e010 0x200b982c 0x6c01d005 0x8022c780
+ 0x4c03d60d 0x00218780 0x6c00d201 0x80204780
+ 0x6c02d405 0x8020c780 0x20000001 0x04004780
+ 0x30040001 0xec000780 0x2040c001 0x04200784
+ 0x301f0005 0xec100780 0x30010001 0xc4100780
+ 0xd0000201 0x04008780 0x20209225 0x00000003
+ 0x30820001 0xac400780 0x300a13fd 0x6c0047c8
+ 0x20001021 0x04000780 0xdc01000d 0x20000780
+ 0xd0010011 0x20000784 0x10065003 0x00000280
+ 0x00020e05 0xc0000782 0x04025201 0xe4220780
+ 0xd4094809 0x20000780 0x1800d001 0x0423c780
+ 0x1800f005 0x0423c780 0x2800c001 0x04200780
+ 0x2800e005 0x04204780 0x20000001 0x04004780
+ 0x04025201 0xe4200780 0x1900e400 0x1900ec04
+ 0x2900e000 0x2901e804 0x20000001 0x04004780
+ 0x307c0dfd 0x640147c8 0x04025201 0xe4200780
0x30000003 0x00000280 0x10004401 0x0023c780
- 0x60004e01 0x00218780 0x00021209 0xc0000780
- 0xd8094805 0x20000780 0xa0004c09 0x04200780
- 0x3006000d 0xc4100780 0x1500e200 0x1500e604
- 0x20000409 0x0400c780 0x3400c00d 0xac200780
- 0x3401c405 0xac200780 0x30020401 0xc4100780
- 0x30010605 0xac000780 0x2000c801 0x04200780
- 0xd00e0005 0xa0c00781
+ 0x60004e05 0x00214780 0x00020e05 0xc0000780
+ 0xa0004c01 0x04200780 0x30060205 0xc4100780
+ 0xd4094805 0x20000780 0x20018004 0x1500e200
+ 0x30020205 0xc4100780 0x2500e008 0x2101e800
+ 0xd00e0009 0xa0c00781
}
}
code {
@@ -838,253 +809,196 @@ code {
code {
name = cudaEstimateResidual8
lmem = 0
- smem = 3748
+ smem = 3760
reg = 14
bar = 1
const {
segname = const
segnum = 1
offset = 0
- bytes = 28
+ bytes = 12
mem {
- 0x000003ff 0x0000000f 0x0000001f 0x00000020
- 0x00000001 0x007fffff 0x0000000e
+ 0x000003ff 0x0000000f 0x007fffff
}
}
bincode {
- 0xd0800209 0x00400780 0xa0000001 0x04000780
- 0xa0000421 0x04000780 0x308101fd 0x644107c8
- 0xa0012003 0x00000000 0x30061009 0xc4100780
- 0x10012003 0x00000280 0x10004409 0x0023c780
- 0x60024e05 0x00220780 0x3007020d 0xc4100780
- 0x30060205 0xc4100780 0x2000060d 0x04004780
- 0x30020005 0xc4100780 0x2103ec10 0x2001840c
- 0x20000205 0x04010780 0xd00e0205 0x80c00780
- 0x00000605 0xc0000780 0x04045201 0xe4204780
+ 0xd0800205 0x00400780 0xa0000019 0x04000780
+ 0xa0000215 0x04000780 0x30810dfd 0x644107c8
+ 0xa0013003 0x00000000 0x30060a01 0xc4100780
+ 0x10013003 0x00000280 0xa0004e05 0x04200780
+ 0xa0004409 0x04200780 0x40428204 0x20018a04
+ 0x30070209 0xc4100780 0x30060205 0xc4100780
+ 0x20000409 0x04004780 0x30020c05 0xc4100780
+ 0x2102ec0c 0x20018008 0x20000205 0x0400c780
+ 0xd00e0205 0x80c00780 0x00000405 0xc0000780
+ 0x04045201 0xe4204780 0x00000005 0xc0000782
+ 0xd4114809 0x20000780 0x3806c1fd 0x6420c7c8
+ 0xa0025003 0x00000000 0x1800c001 0x0423c780
+ 0x10024003 0x00000280 0xa0004e05 0x04200780
+ 0xa0004409 0x04200780 0x40428204 0x20018a04
+ 0x30070209 0xc4100780 0x30060205 0xc4100780
+ 0x20000405 0x04004780 0x30020c09 0xc4100780
+ 0x2101ec04 0x20018404 0x20008205 0x00000007
+ 0xd00e0205 0x80c00780 0x10025003 0x00000780
+ 0x1000f805 0x0403c780 0x30050a09 0xc4100782
+ 0x2000041d 0x040187c0 0x00020e09 0xc0000780
+ 0x08055201 0xe4204780 0xa0036003 0x00000000
+ 0x10036003 0x00000280 0xa0004c05 0x04200780
+ 0x4001d005 0x00218780 0x00075201 0xe4204780
+ 0xd01d480d 0x20000780 0xd0115009 0x20000780
+ 0x1100f004 0x1d00e008 0x2101ee0c 0x2942ec04
+ 0x30030205 0xac000780 0x00075401 0xe4204780
+ 0x1d00e004 0x2901e004 0x00075601 0xe4204780
0xf0000001 0xe0000002 0x861ffe03 0x00000000
- 0xa0004c15 0x04200780 0x1000d005 0x0423c780
- 0x400a060d 0x00000780 0x3010060d 0xc4100780
- 0x600a040d 0x0000c780 0x1000d005 0x0423c780
- 0xd0118005 0x20000780 0x2101ee04 0x2543e010
- 0x30051019 0xc4100780 0x30040211 0xac000780
- 0x20000c25 0x04000780 0x300909fd 0x6c00c7c8
- 0xa002b003 0x00000000 0x1002a003 0x00000280
- 0xd0115005 0x20000780 0x2503e004 0x20019204
- 0x30020205 0xc4100780 0x2000ca05 0x04204780
- 0xd00e0219 0x80c00780 0x1400d405 0x0423c780
- 0x30010c05 0xec000780 0x1002b003 0x00000780
- 0x1000f805 0x0403c780 0x308213fd 0x6c4107ca
- 0x0002120d 0xc0000780 0x0c001201 0xe4204780
- 0xa0040003 0x00000000 0x10040003 0x00000280
- 0x2000d005 0x04224780 0x300403fd 0x6c0187c8
- 0x00020205 0xc0000780 0xa003f003 0x00000000
- 0x1003e003 0x00000280 0xd0115009 0x20000780
- 0x2109f010 0x2903e004 0x20000205 0x04010780
- 0x30020205 0xc4100780 0x2000ca05 0x04204780
- 0xd00e0211 0x80c00780 0x1800d405 0x0423c780
- 0x30010805 0xec000780 0x1003f003 0x00000780
- 0x1000f805 0x0403c780 0x04001201 0xe4204782
- 0xf0000001 0xe0000002 0x861ffe03 0x00000000
- 0x00000409 0xc0000780 0x0002120d 0xc0000780
- 0x0c025201 0xe43f0780 0xd8114805 0x20000780
- 0x3400c1fd 0x6420c7c8 0xa0054003 0x00000000
- 0x10053003 0x00000280 0x10004409 0x0023c780
- 0x60024e05 0x00220780 0x30070209 0xc4100780
- 0x30060205 0xc4100780 0x20000409 0x04004780
- 0x30020005 0xc4100780 0x2102ec08 0x20028204
- 0x20008205 0x00000007 0xd00e0205 0x80c00780
- 0x10054003 0x00000780 0x1000f805 0x0403c780
- 0x0002120d 0xc0000782 0x0c055201 0xe4204780
- 0xd0118005 0x20000780 0xd811480d 0x20000780
- 0x2440c009 0x0420c780 0x3c84cbfd 0x6c6147c8
- 0x60864405 0x00600780 0x3c00c011 0x04208780
- 0x1000020d 0x0403c500 0x1000000d 0x0403c280
- 0x300107fd 0x640187c8 0xa008f003 0x00000000
- 0x10000609 0x0403c780 0x1000f829 0x0403c780
- 0x1008c003 0x00000280 0x20098619 0x00000003
- 0x3004d00d 0xac200780 0x00071005 0xc0000780
- 0x00020c0d 0xc0000780 0x307c0619 0x8c000780
- 0xd4154811 0x20000780 0x1000c21d 0x0423c784
- 0x1000c62d 0x0423c784 0x1000c011 0x0423c784
- 0x1000c40d 0x0423c784 0x4d47e21c 0x4d4be62c
- 0x6c04c031 0x8021c780 0x6c03c435 0x8022c780
- 0x1000ca2d 0x0423c784 0x1000c81d 0x0423c784
- 0x1000ce11 0x0423c784 0x1000cc0d 0x0423c784
- 0x4c0bca2d 0x00218780 0xd8114811 0x20000780
- 0x20001831 0x04034780 0x6c07c835 0x8022c780
- 0x2000c02d 0x04208784 0x1000c41d 0x0423c784
- 0x4d44ee10 0x200d9830 0x6c03cc0d 0x80210780
- 0x2000180d 0x0400c780 0x00021611 0xc0000780
- 0x3007060d 0xec000780 0x2040d20d 0x0420c784
- 0x301f0611 0xec100780 0x3001061d 0xc4100780
- 0x30020c0d 0x6c010780 0xd0070811 0x04008780
- 0x20208409 0x00000003 0xa000060d 0x2c014780
- 0x30850811 0xac400780 0x300105fd 0x640047c8
- 0x60040629 0x80028780 0xdc01000d 0x20000780
- 0x10068003 0x00000280 0x1008f003 0x00000780
- 0x3004d005 0xac200780 0x00071005 0xc0000780
- 0x307c0219 0x8c000780 0x0002120d 0xc0000782
- 0x0c025201 0xe4228780 0xdc094811 0x20000780
- 0x1000d005 0x0423c784 0x1000f009 0x0423c784
- 0x2000c005 0x04204784 0x2000e009 0x04208784
- 0x20000205 0x04008780 0x0c025201 0xe4204780
- 0x1000c405 0x0423c784 0x1000cc09 0x0423c784
- 0x2000c005 0x04204784 0x2000c809 0x04208784
- 0x20000205 0x04008780 0x0c025201 0xe4204780
- 0x1000c205 0x0423c784 0x2000c005 0x04204784
- 0x0c025201 0xe4204780 0xd8117009 0x20000780
- 0x387cc1fd 0x6c2147c8 0xa00ab003 0x00000000
- 0x100a8003 0x00000280 0xd4094809 0x20000780
- 0x387cc1fd 0x6c2087c8 0x100aa003 0x00000280
- 0x10018005 0x00000003 0x100ab003 0x00000780
- 0x1000f805 0x0403c780 0xf0000001 0xe0000002
- 0x20018009 0x00000003 0x4005180d 0x00000780
- 0x60041a0d 0x0000c780 0x3086001d 0x64410780
- 0xd4094805 0x20000780 0x30100611 0xc4100780
- 0xa0000e1d 0x2c014780 0x30010c0d 0xec100780
- 0x60041811 0x00010780 0x407f8e19 0x0007ffff
- 0x2543e008 0x2006880c 0x30000409 0xec000780
- 0x20000409 0x0400c780 0x4005040d 0x00000780
- 0x6004060d 0x0000c780 0x3010060d 0xc4100780
- 0x60040405 0x0000c780 0x00021209 0xc0000780
- 0x08025201 0xe4204780 0xd8094805 0x20000780
- 0x1500e804 0x1500f808 0x3401c005 0xac200780
- 0x3402d009 0xac200780 0x30020205 0xac000780
- 0x307c01fd 0x640147c8 0x08025201 0xe4204780
+ 0xd01d5009 0x20000780 0x3807c1fd 0x6c20c7c8
+ 0xa0046003 0x00000000 0x10046003 0x00000280
+ 0xd01d5809 0x20000780 0x2800c005 0x0421c780
+ 0x30020205 0xc4100780 0xd011a009 0x20000780
+ 0x2000ca05 0x04204780 0xd00e0209 0x80c00780
+ 0x1800c005 0x0423c780 0x30010405 0xec000780
+ 0x00020e09 0xc0000780 0x08001201 0xe4204780
+ 0xd01d5009 0x20000782 0x2000d005 0x0421c780
+ 0x3801c1fd 0x6c20c7c8 0x00020209 0xc0000780
+ 0xa0055003 0x00000000 0x10055003 0x00000280
+ 0xd01d580d 0x20000780 0x2d07e004 0x2101f004
+ 0x30020205 0xc4100780 0xd011a00d 0x20000780
+ 0x2000ca05 0x04204780 0xd00e0209 0x80c00780
+ 0x1c00c005 0x0423c780 0x30010405 0xec000780
+ 0x08001201 0xe4204780 0xf0000001 0xe0000002
+ 0x861ffe03 0x00000000 0xd01d5009 0x20000780
+ 0x2840c005 0x04200780 0x3001d005 0xac200780
+ 0x307c0205 0x8c000780 0x300603fd 0x6c00c7c8
+ 0xa0082003 0x00000000 0x1000f821 0x0403c780
+ 0x10082003 0x00000280 0x20000025 0x04018780
+ 0x20098c09 0x00000003 0x2009920d 0x00000003
+ 0x20000029 0x04004780 0x0002040d 0xc0000780
+ 0x00020611 0xc0000780 0x00070a09 0xc0000780
+ 0xd8154809 0x20000780 0x1900e204 0x1900e000
+ 0x4c01c205 0x00218780 0x6c00c031 0x80204780
+ 0x1900e62c 0x1900e410 0x1900ea04 0x1900e800
+ 0x1900ee0c 0x1900ec08 0x4c0bc62d 0x00218780
+ 0xd4115809 0x20000780 0x6c04c42d 0x8022c780
+ 0x1900e010 0x4d41ea34 0x20001805 0x0402c780
+ 0x6c00c801 0x80234780 0x4d43ee0c 0x20008200
+ 0x6c02cc05 0x8020c780 0x20000001 0x04004780
+ 0x30040001 0xec000780 0x2040c001 0x04200784
+ 0x301f0005 0xec100780 0x30010001 0xc4100780
+ 0xd0000201 0x04008780 0x20209225 0x00000003
+ 0x30820001 0xac400780 0x300a13fd 0x6c0047c8
+ 0x20001021 0x04000780 0xdc01000d 0x20000780
+ 0xd0010011 0x20000784 0x10065003 0x00000280
+ 0x00020e05 0xc0000782 0x04025201 0xe4220780
+ 0xd4094809 0x20000780 0x1800d001 0x0423c780
+ 0x1800f005 0x0423c780 0x2800c001 0x04200780
+ 0x2800e005 0x04204780 0x20000001 0x04004780
+ 0x04025201 0xe4200780 0x1900e400 0x1900ec04
+ 0x2900e000 0x2901e804 0x20000001 0x04004780
+ 0x307c0dfd 0x640147c8 0x04025201 0xe4200780
0x30000003 0x00000280 0x10004401 0x0023c780
- 0x60004e01 0x00220780 0x0002120d 0xc0000780
- 0xdc094805 0x20000780 0x30060009 0xc4100780
- 0x1500e200 0x1500e604 0x20000a09 0x04008780
- 0x3400c00d 0xac200780 0x3401c405 0xac200780
- 0x30020401 0xc4100780 0x30010605 0xac000780
- 0x2000c801 0x04200780 0xd00e0005 0xa0c00781
+ 0x60004e05 0x00214780 0x00020e05 0xc0000780
+ 0xa0004c01 0x04200780 0x30060205 0xc4100780
+ 0xd4094805 0x20000780 0x20018004 0x1500e200
+ 0x30020205 0xc4100780 0x2500e008 0x2101e800
+ 0xd00e0009 0xa0c00781
}
}
code {
name = cudaEstimateResidual
lmem = 0
smem = 3748
- reg = 13
+ reg = 12
bar = 1
const {
segname = const
segnum = 1
offset = 0
- bytes = 24
+ bytes = 20
mem {
0x000003ff 0x0000000f 0x0000001f 0x00000001
- 0x007fffff 0x0000000e
+ 0x007fffff
}
}
bincode {
0xd0800205 0x00400780 0xa0000019 0x04000780
- 0xa0000215 0x04000780 0x30810dfd 0x644107c8
+ 0xa0000211 0x04000780 0x30810dfd 0x644107c8
0xa0015003 0x00000000 0xa0004405 0x04200780
- 0x30060a11 0xc4100780 0x10015003 0x00000280
+ 0x30060815 0xc4100780 0x10015003 0x00000280
0x40034e01 0x00200780 0x30100001 0xc4100780
- 0x60024e01 0x00200780 0x20000001 0x04014780
+ 0x60024e01 0x00200780 0x20000001 0x04010780
0x30070009 0xc4100780 0x30060001 0xc4100780
- 0x20000401 0x04000780 0x30020c0d 0xc4100780
- 0x2100ec00 0x20038808 0x20000601 0x04000780
+ 0x20000409 0x04000780 0x30020c01 0xc4100780
+ 0x2102ec0c 0x20008a08 0x20000001 0x0400c780
0xd00e0001 0x80c00780 0x00000405 0xc0000780
0x04045201 0xe4200780 0xf0000001 0xe0000002
0x861ffe03 0x00000000 0xa0004c09 0x04200780
0x1000d001 0x0423c780 0x4004020d 0x00000780
0x3010060d 0xc4100780 0x6004000d 0x0000c780
0x1000d001 0x0423c780 0xd0118005 0x20000780
- 0x2100ee20 0x2543e01c 0x30050a01 0xc4100780
- 0x3007101d 0xac000780 0x20000021 0x04018780
+ 0x2100ee00 0x2543e01c 0x30050821 0xc4100780
+ 0x3007001d 0xac000780 0x20001021 0x04018780
0x30080ffd 0x6c00c7c8 0xa002e003 0x00000000
0x1002d003 0x00000280 0xd0115005 0x20000780
0x2503e000 0x20009000 0x30020001 0xc4100780
0x2000ca01 0x04200780 0xd00e0025 0x80c00780
0x1400d401 0x0423c780 0x30001201 0xec000780
0x1002e003 0x00000780 0x1000f801 0x0403c780
- 0x00021009 0xc0000782 0x308211fd 0x6c4107c8
- 0x08001201 0xe4200780 0xa0043003 0x00000000
+ 0x00021005 0xc0000782 0x308211fd 0x6c4107c8
+ 0x04001201 0xe4200780 0xa0043003 0x00000000
0x10043003 0x00000280 0x2000d001 0x04220780
- 0x300701fd 0x6c0187c8 0x00020005 0xc0000780
+ 0x300701fd 0x6c0187c8 0x00020009 0xc0000780
0xa0042003 0x00000000 0x10041003 0x00000280
- 0xd011500d 0x20000780 0x2108f000 0x2d03e020
- 0x20001001 0x04000780 0x30020001 0xc4100780
- 0x2000ca01 0x04200780 0xd00e0021 0x80c00780
- 0x1c00d401 0x0423c780 0x30001001 0xec000780
+ 0xd011500d 0x20000780 0x2108f020 0x2d03e000
+ 0x20000001 0x04020780 0x30020001 0xc4100780
+ 0x2000ca01 0x04200780 0xd00e000d 0x80c00780
+ 0x1c00d401 0x0423c780 0x30000601 0xec000780
0x10042003 0x00000780 0x1000f801 0x0403c780
- 0x04001201 0xe4200782 0xf0000001 0xe0000002
- 0x861ffe03 0x00000000 0x08025201 0xe43f0780
- 0x00000805 0xc0000780 0xd4114805 0x20000780
- 0x3406c1fd 0x6420c7c8 0xa0058003 0x00000000
+ 0x08001201 0xe4200782 0xf0000001 0xe0000002
+ 0x861ffe03 0x00000000 0x04025201 0xe43f0780
+ 0x00000a09 0xc0000780 0xd8114809 0x20000780
+ 0x3806c1fd 0x6420c7c8 0xa0058003 0x00000000
0x10057003 0x00000280 0x40034e01 0x00200780
0x30100001 0xc4100780 0x60024e01 0x00200780
- 0x20000001 0x04014780 0x30070021 0xc4100780
- 0x30060001 0xc4100780 0x20001001 0x04000780
- 0x30020c21 0xc4100780 0x2100ec00 0x20009000
+ 0x20000001 0x04010780 0x3007000d 0xc4100780
+ 0x30060001 0xc4100780 0x2000060d 0x04000780
+ 0x30020c01 0xc4100780 0x2103ec0c 0x20038000
0x20008001 0x00000007 0xd00e0001 0x80c00780
0x10058003 0x00000780 0x1000f801 0x0403c780
- 0x08055201 0xe4200782 0x00000805 0xc0000780
- 0xd411480d 0x20000780 0x3c83ca01 0x6c608780
- 0xd0118005 0x20000780 0xd000022d 0x04000780
- 0x2440c001 0x0420c780 0x300117fd 0x640187c8
- 0xa0093003 0x00000000 0x3c00c00d 0x04200780
- 0x1008f003 0x00000280 0x30070a01 0xc4100780
- 0x20248001 0x00000003 0x00000005 0xc0000780
- 0x04000031 0x40000780 0x00000805 0xc0000780
- 0xd4114805 0x20000780 0x30051601 0xc4100780
- 0x3500e021 0x00000003 0x20000c01 0x04000780
- 0x307c11fd 0x6c0187c8 0xa007b003 0x00000000
- 0x1000f825 0x0403c780 0x2400c021 0x04200780
- 0x1007b003 0x00000280 0x20098029 0x00000003
- 0x0002140d 0xc0000780 0x00001805 0xc0000780
- 0xd4150011 0x20000780 0x1000c029 0x0423c784
- 0x20018001 0x00000003 0x6e0ac225 0x80224780
- 0x300801fd 0x6c0147c8 0xd4000805 0x20000780
- 0x10074003 0x00000280 0x00000805 0xc0000782
- 0xd4115805 0x20000780 0x1400c001 0x0423c780
- 0x30001201 0xec000780 0x00021005 0xc0000780
- 0x2440d225 0x04200780 0x301f1201 0xec100780
- 0x30011225 0xc4100780 0x30080e21 0x6c010780
- 0xd8094805 0x20000780 0xd0090025 0x04008780
- 0xa0001021 0x2c014780 0x1400c001 0x0423c780
- 0x30841225 0xac400780 0x2001962d 0x00000003
- 0x60091001 0x80000780 0x300117fd 0x640147c8
- 0x08025201 0xe4200780 0x10067003 0x00000280
- 0x10093003 0x00000780 0x30070a01 0xc4100780
- 0x20248001 0x00000003 0x00000005 0xc0000780
- 0x04000031 0x40000780 0xd8094805 0x20000782
- 0x1400d01d 0x0423c780 0x1400f001 0x0423c780
- 0x2400c01d 0x0421c780 0x2400e001 0x04200780
- 0x20000e01 0x04000780 0x08025201 0xe4200780
- 0x1500e41c 0x1500ec00 0x2507e01c 0x2500e800
- 0x20000e01 0x04000780 0x08025201 0xe4200780
- 0x1500e200 0x2500e000 0x08025201 0xe4200780
- 0x00000805 0xc0000780 0xd4117005 0x20000780
- 0x347cc1fd 0x6c2147c8 0xa00ac003 0x00000000
- 0x100a9003 0x00000280 0x00001805 0xc0000780
- 0xd4090005 0x20000780 0x347cc1fd 0x6c2087c8
- 0x100ab003 0x00000280 0x10018001 0x00000003
- 0x100ac003 0x00000780 0x1000f801 0x0403c780
- 0x3003d00d 0xac200782 0x20018c11 0x00000003
- 0x307c060d 0x8c000780 0x40090c1d 0x00000780
- 0x60080e21 0x0001c780 0x30850c1d 0x64410780
- 0x00001805 0xc0000780 0xd4090005 0x20000780
- 0x30101021 0xc4100780 0xa0000e1d 0x2c014780
- 0x30010625 0xec100780 0x60080c11 0x00020780
- 0x407f8e0d 0x0007ffff 0x2549e01c 0x2003880c
- 0x30060e11 0xec000780 0x2000080d 0x0400c780
- 0x40070011 0x00000780 0x60060211 0x00010780
- 0x30100811 0xc4100780 0x60060001 0x00010780
- 0x08025201 0xe4200780 0xd8094805 0x20000780
- 0x1500e80c 0x1500f800 0x3403c00d 0xac200780
- 0x3400d001 0xac200780 0x30000601 0xac000780
- 0x307c0dfd 0x640147c8 0x08025201 0xe4200780
- 0x30000003 0x00000280 0x40034e01 0x00200780
- 0x30100001 0xc4100780 0x60024e01 0x00200780
- 0x20000001 0x04014780 0xd8094805 0x20000780
- 0x3006000d 0xc4100780 0x1500e204 0x1500e600
- 0x20000409 0x0400c780 0x3401c005 0xac200780
- 0x3400c401 0xac200780 0x30020409 0xc4100780
- 0x30000201 0xac000780 0x2000c805 0x04208780
- 0xd00e0201 0xa0c00781
+ 0x04055201 0xe4200782 0x00000a09 0xc0000780
+ 0xd8117009 0x20000780 0x3883c001 0x6c608780
+ 0xd0000221 0x04000780 0x300111fd 0x640187c8
+ 0xa0089003 0x00000000 0x10089003 0x00000280
+ 0x00000a09 0xc0000780 0xd8114809 0x20000780
+ 0x30051001 0xc4100780 0x3900e00d 0x00000003
+ 0x20000c01 0x04000780 0x307c07fd 0x6c0187c8
+ 0xa0076003 0x00000000 0x1000f829 0x0403c780
+ 0x2800c025 0x04200780 0x10076003 0x00000280
+ 0x3007082d 0xc4100780 0x2009800d 0x00000003
+ 0x2024962d 0x00000003 0x00020609 0xc0000780
+ 0x0000160d 0xc0000780 0xdc150011 0x20000780
+ 0x1000c00d 0x0423c784 0x20018001 0x00000003
+ 0x6a03c229 0x80228780 0x300901fd 0x6c0147c8
+ 0xdc00080d 0x20000780 0x1006f003 0x00000280
+ 0x00000a09 0xc0000782 0xd8115809 0x20000780
+ 0x1800c001 0x0423c780 0x30001401 0xec000780
+ 0x00021209 0xc0000780 0x2840d20d 0x04200780
+ 0x301f0601 0xec100780 0x30010629 0xc4100780
+ 0x30090e0d 0x6c010780 0xd4094809 0x20000780
+ 0xd00a0025 0x04008780 0xa000060d 0x2c014780
+ 0x1800c001 0x0423c780 0x30841225 0xac400780
+ 0x20019021 0x00000003 0x60090601 0x80000780
+ 0x300111fd 0x640147c8 0x04025201 0xe4200780
+ 0x10060003 0x00000280 0xd4094809 0x20000782
+ 0x1800d001 0x0423c780 0x1800f00d 0x0423c780
+ 0x2800c001 0x04200780 0x2800e00d 0x0420c780
+ 0x20000001 0x0400c780 0x04025201 0xe4200780
+ 0x1900e400 0x1900ec0c 0x2900e000 0x2903e80c
+ 0x20000001 0x0400c780 0x307c0dfd 0x640147c8
+ 0x04025201 0xe4200780 0x30000003 0x00000280
+ 0x40034e01 0x00200780 0x30100001 0xc4100780
+ 0x60024e01 0x00200780 0x20000001 0x04010780
+ 0x30060001 0xc4100780 0xd4094805 0x20000780
+ 0x20008404 0x1500e200 0x30020205 0xc4100780
+ 0x2500e008 0x2101e800 0xd00e0009 0xa0c00781
}
}
code {
@@ -1187,158 +1101,173 @@ code {
code {
name = cudaChooseBestMethod
lmem = 0
- smem = 4128
- reg = 13
+ smem = 4132
+ reg = 14
bar = 1
const {
segname = const
segnum = 1
offset = 0
- bytes = 28
+ bytes = 48
mem {
- 0x000003ff 0x00000008 0x00000001 0x00000020
- 0x0000007f 0x0000003f 0x0000001f
+ 0x000003ff 0x7e800000 0x3f000000 0x0000000e
+ 0x00000008 0x00000001 0x00000020 0x0000007f
+ 0x0000003f 0x0000001f 0x00000024 0x3e800000
}
}
bincode {
0xd0800205 0x00400780 0xa000020d 0x04000780
0xa0000005 0x04000780 0x30050601 0xc4100780
0x20000211 0x04000780 0x103f8001 0x07ffffff
- 0x00020805 0xc0000780 0x307ccffd 0x6c20c7c8
- 0x04011001 0xe4200780 0x00070609 0xc0000780
- 0x10096003 0x00000280 0xa0004415 0x04200780
+ 0x00020805 0xc0000780 0x307cd1fd 0x6c20c7c8
+ 0x04011201 0xe4200780 0x00070609 0xc0000780
+ 0x100b3003 0x00000280 0xa0004415 0x04200780
0x1000f819 0x0403c780 0x20000c1d 0x0400c780
- 0x3007cffd 0x6420c7c8 0xa0093003 0x00000000
- 0x10093003 0x00000280 0x1000ce01 0x0423c780
+ 0x3007d1fd 0x6420c7c8 0xa00b0003 0x00000000
+ 0x100b0003 0x00000280 0x1000d001 0x0423c780
0x40014e09 0x00200780 0x30100409 0xc4100780
- 0x60004e21 0x00208780 0x30070601 0xc4100780
+ 0x60004e09 0x00208780 0x30070601 0xc4100780
0x30070c2d 0xc4100780 0x30060c31 0xc4100780
- 0x30060629 0xc4100780 0x30071025 0xc4100780
- 0x30061009 0xc4100780 0x200c962c 0x200a8000
- 0x20029224 0x210be828 0x30020209 0xc4100780
- 0x200a802c 0x20098428 0x20028024 0x200b9408
- 0xd00e0409 0x80c00780 0x00000005 0xc0000780
- 0x0000120d 0xc0000780 0x307ccdfd 0x6c20c7c8
- 0x0c051001 0xe4208780 0x1000f809 0x0403c780
- 0x10038003 0x00000280 0xa0037003 0x00000000
- 0x10008200 0x2101ec24 0xa0004229 0x04200780
- 0x3000cdfd 0x6420c7c8 0xa0034003 0x00000000
- 0x10034003 0x00000280 0x20000e2d 0x04020780
- 0x3006162d 0xc4100780 0x2000002d 0x0402c780
- 0x3002162d 0xc4100780 0x2000ca2d 0x0422c780
- 0xd00e162d 0x80c00780 0x20001609 0x04008780
- 0x20000001 0x04028782 0x300901fd 0x6c0047c8
- 0x1002a003 0x00000280 0xf0000001 0xe0000002
- 0x0002080d 0xc0000780 0x0c031001 0xe4208780
- 0xdc0c4011 0x20000780 0x1000e001 0x0423c784
- 0x2000c001 0x04200784 0x0c031001 0xe4200780
- 0x1000d001 0x0423c784 0x2000c001 0x04200784
- 0x0c031001 0xe4200780 0x1000c801 0x0423c784
- 0x2000c001 0x04200784 0x0c031001 0xe4200780
- 0x1000c401 0x0423c784 0x2000c001 0x04200784
- 0x0c031001 0xe4200780 0x1000c201 0x0423c784
- 0x2000c001 0x04200784 0x307c03fd 0x640147c8
- 0x0c031001 0xe4200780 0x10093003 0x00000280
- 0xd414680d 0x20000780 0x1d00ec08 0x1d00e400
- 0x2c40c209 0x04208780 0x40050021 0x00000780
- 0x60040221 0x00020780 0x30101021 0xc4100780
- 0x3c81c1fd 0x6c6147c8 0x60040021 0x00020780
- 0xa0090003 0x00000000 0x10066003 0x00000280
- 0xd4144005 0x20000780 0x1400c001 0x0423c780
- 0x3002cc25 0xc4300780 0x4005002d 0x00000780
- 0x301f1229 0xec100780 0x6004022d 0x0002c780
- 0xd0821429 0x04400780 0x3010162d 0xc4100780
- 0x20001425 0x04024780 0x60040009 0x0002c780
- 0x30011201 0xec100780 0x20000401 0x04000780
- 0xd80c4005 0x20000780 0x2400c001 0x04200780
- 0x20068001 0x00000003 0x10090003 0x00000780
- 0xd414680d 0x20000780 0x3c83c1fd 0x6c6147c8
- 0xa008f003 0x00000000 0x1007a003 0x00000280
- 0xd4144005 0x20000780 0x2502e608 0x1500e000
- 0x3002cc25 0xc4300780 0x40050029 0x00000780
- 0x301f122d 0xec100780 0x60040229 0x00028780
- 0xd082162d 0x04400780 0x30101429 0xc4100780
- 0x20001625 0x04024780 0x60040001 0x00028780
- 0x30011209 0xec100780 0x20000001 0x04008780
- 0xd80c4005 0x20000780 0x2400c001 0x04200780
- 0x200f8001 0x00000003 0x1008f003 0x00000780
- 0xd414680d 0x20000780 0x3c7cc1fd 0x6c2147c8
- 0xa008e003 0x00000000 0x10088003 0x00000280
- 0xd80c400d 0x20000780 0xd4147805 0x20000780
- 0x3c7cc1fd 0x6c2087c8 0x2501e001 0x00000003
- 0x10000401 0x2440c280 0x40050025 0x00000780
- 0x60040225 0x00024780 0x30101225 0xc4100780
- 0x60040001 0x00024780 0x1008e003 0x00000780
- 0xd4147805 0x20000780 0x1400c001 0x0423c780
+ 0x30060629 0xc4100780 0x30070425 0xc4100780
+ 0x30060421 0xc4100780 0x200c962c 0x200a8000
+ 0x20089224 0x210be828 0x30020221 0xc4100780
+ 0x200a802c 0x20099028 0x20088024 0x200b9420
+ 0xd00e1021 0x80c00780 0x00000005 0xc0000780
+ 0x0000120d 0xc0000780 0x0c051201 0xe4220780
+ 0x3001cffd 0x6c20c7c8 0xa0055003 0x00000000
+ 0x10000201 0x0403c780 0x1000f821 0x0403c780
+ 0x10055003 0x00000280 0x30060c25 0xc4100780
+ 0x30060629 0xc4100780 0x20001225 0x04028780
+ 0x30060409 0xc4100780 0x20000425 0x04024780
+ 0xa0004229 0x04200780 0xd414480d 0x20000780
+ 0x1c00ce09 0x0423c780 0x6000cc09 0x88208780
+ 0x3c7ccbfd 0x6c2147c8 0x3d02e02c 0x20009208
+ 0x30020409 0xc4100780 0x2000ca09 0x04208780
+ 0xd00e0409 0x80c00780 0xa003e003 0x00000000
+ 0x1003b003 0x00000280 0x307c05fd 0x6c0087c8
+ 0x1003d003 0x00000280 0x10018031 0x00000003
+ 0x1003e003 0x00000780 0x1000f831 0x0403c780
+ 0x300bcc2d 0xac200782 0x307c162d 0x8c000780
+ 0x400c162d 0x00018780 0xa0001631 0x44014780
+ 0xb03d1831 0x0358637b 0xa0000435 0x44014780
+ 0xb08119fd 0x605107c8 0xb03d1a35 0x0358637b
+ 0xc08b1a35 0x00400680 0xc08b1831 0x00400680
+ 0x90001831 0x00000780 0xe10c1a31 0x00408780
+ 0x90001831 0x60000780 0xa0001831 0x8c064780
+ 0x30831831 0xac400780 0x307c1831 0x8c000780
+ 0x300c0409 0xec000780 0x20019831 0x00000003
+ 0x20000001 0x04028780 0x600c1609 0x80008780
+ 0x3000cffd 0x6c2107c8 0x20001021 0x04008780
+ 0x1002f003 0x00000280 0x0002080d 0xc0000782
+ 0x0c031201 0xe4220780 0xdc0c4811 0x20000780
+ 0x1000e001 0x0423c784 0x2000c001 0x04200784
+ 0x0c031201 0xe4200780 0x1000d001 0x0423c784
+ 0x2000c001 0x04200784 0x0c031201 0xe4200780
+ 0x1000c801 0x0423c784 0x2000c001 0x04200784
+ 0x0c031201 0xe4200780 0x1000c401 0x0423c784
+ 0x2000c001 0x04200784 0x0c031201 0xe4200780
+ 0x1000c201 0x0423c784 0x2000c001 0x04200784
+ 0x307c03fd 0x640147c8 0x0c031201 0xe4200780
+ 0x100b0003 0x00000280 0xd414700d 0x20000780
+ 0x1d00ec08 0x1d00e400 0x2c40c209 0x04208780
+ 0x40050021 0x00000780 0x60040221 0x00020780
+ 0x30101021 0xc4100780 0x3c84c1fd 0x6c6147c8
+ 0x60040021 0x00020780 0xa00ad003 0x00000000
+ 0x10083003 0x00000280 0xd4144805 0x20000780
+ 0x1400c001 0x0423c780 0x3002ce25 0xc4300780
+ 0x4005002d 0x00000780 0x301f1229 0xec100780
+ 0x6004022d 0x0002c780 0xd0851429 0x04400780
+ 0x3010162d 0xc4100780 0x20001425 0x04024780
+ 0x60040009 0x0002c780 0x30011201 0xec100780
+ 0x20000401 0x04000780 0xd80c4805 0x20000780
+ 0x2400c001 0x04200780 0x20068001 0x00000003
+ 0x100ad003 0x00000780 0xd414700d 0x20000780
+ 0x3c86c1fd 0x6c6147c8 0xa00ac003 0x00000000
+ 0x10097003 0x00000280 0xd4144805 0x20000780
+ 0x2502e608 0x1500e000 0x3002ce25 0xc4300780
+ 0x40050029 0x00000780 0x301f122d 0xec100780
+ 0x60040229 0x00028780 0xd085162d 0x04400780
+ 0x30101429 0xc4100780 0x20001625 0x04024780
+ 0x60040001 0x00028780 0x30011209 0xec100780
+ 0x20000001 0x04008780 0xd80c4805 0x20000780
+ 0x2400c001 0x04200780 0x200f8001 0x00000003
+ 0x100ac003 0x00000780 0xd414700d 0x20000780
+ 0x3c7cc1fd 0x6c2147c8 0xa00ab003 0x00000000
+ 0x100a5003 0x00000280 0xd80c480d 0x20000780
+ 0xd4148005 0x20000780 0x3c7cc1fd 0x6c2087c8
+ 0x2501e001 0x00000003 0x10000a01 0x2440c280
0x40050025 0x00000780 0x60040225 0x00024780
0x30101225 0xc4100780 0x60040001 0x00024780
- 0xf0000001 0xe0000002 0xf0000001 0xe0000002
- 0x30080001 0xac000782 0x00020e05 0xc0000780
- 0x04011001 0xe4200780 0x20000c19 0x04014782
- 0x3006cffd 0x6c2107c8 0x1000d003 0x00000280
- 0x861ffe03 0x00000000 0x3004cffd 0x6c20c7c8
- 0xa00a7003 0x00000000 0x100a7003 0x00000280
- 0x1000ce01 0x0423c780 0x40014e05 0x00200780
- 0x30100205 0xc4100780 0x60004e01 0x00204780
- 0x20000001 0x04010780 0x30070005 0xc4100780
- 0x30060001 0xc4100780 0x00020805 0xc0000780
- 0x20000201 0x04000780 0xd4044005 0x20000780
- 0x2100e804 0x1500e000 0x20108205 0x00000003
- 0xd00e0201 0xa0c00780 0xf0000001 0xe0000002
- 0x861ffe03 0x00000000 0x00020805 0xc0000780
- 0xd4044005 0x20000780 0x308409fd 0x6c4107c8
- 0xa00ba003 0x00000000 0x1400c001 0x0423c780
- 0x100ba003 0x00000280 0x00020805 0xc0000780
- 0xd408400d 0x20000780 0xd4044009 0x20000780
- 0x1c00c001 0x0423c780 0x3800c1fd 0x6c2107c8
- 0x1c00c001 0x0423c780 0x20008805 0x0000000b
- 0x3800c001 0xac200780 0x10000805 0x0403c500
- 0x04001001 0xe4204780 0x04011001 0xe4200780
+ 0x100ab003 0x00000780 0xd4148005 0x20000780
+ 0x1400c001 0x0423c780 0x40050025 0x00000780
+ 0x60040225 0x00024780 0x30101225 0xc4100780
+ 0x60040001 0x00024780 0xf0000001 0xe0000002
+ 0xf0000001 0xe0000002 0x30080001 0xac000782
+ 0x00020e05 0xc0000780 0x04011201 0xe4200780
+ 0x20000c19 0x04014782 0x3006d1fd 0x6c2107c8
+ 0x1000d003 0x00000280 0x861ffe03 0x00000000
+ 0x3004d1fd 0x6c20c7c8 0xa00c4003 0x00000000
+ 0x100c4003 0x00000280 0x1000d001 0x0423c780
+ 0x40014e05 0x00200780 0x30100205 0xc4100780
+ 0x60004e01 0x00204780 0x20000001 0x04010780
+ 0x30070005 0xc4100780 0x30060001 0xc4100780
+ 0x00020805 0xc0000780 0x20000201 0x04000780
+ 0xd4044805 0x20000780 0x2100e804 0x1500e000
+ 0x20108205 0x00000003 0xd00e0201 0xa0c00780
0xf0000001 0xe0000002 0x861ffe03 0x00000000
- 0x308509fd 0x6c4107c8 0xa00c9003 0x00000000
- 0x100c9003 0x00000280 0x00020805 0xc0000780
- 0xd4064009 0x20000780 0x20008805 0x00000007
- 0x3800c1fd 0x6c2047c8 0x10000805 0x0403c500
- 0x0002020d 0xc0000780 0x3800c001 0xac200780
- 0x1c00d005 0x0423c780 0x04001001 0xe4204780
- 0x04011001 0xe4200780 0xf0000001 0xe0000002
- 0x861ffe03 0x00000000 0x308609fd 0x6c4107c8
- 0x30000003 0x00000280 0x20288805 0x0000000b
- 0x00020209 0xc0000780 0x20208805 0x00000003
- 0x3800c1fd 0x6c2047c8 0x10000805 0x0403c500
- 0x0002020d 0xc0000780 0x00020805 0xc0000780
- 0x3800c005 0xac200780 0x1c00d001 0x0423c780
- 0x20188809 0x0000000b 0x04001001 0xe4200780
- 0x00020409 0xc0000780 0x04011001 0xe4204780
- 0x20108801 0x00000003 0x3801c1fd 0x6c2047c8
- 0x10000801 0x0403c500 0x0002000d 0xc0000780
- 0x3801c005 0xac200780 0x1c00d001 0x0423c780
- 0x20108809 0x0000000b 0x04001001 0xe4200780
- 0x00020409 0xc0000780 0x04011001 0xe4204780
- 0x20088801 0x00000003 0x3801c1fd 0x6c2047c8
- 0x10000801 0x0403c500 0x0002000d 0xc0000780
- 0x3801c005 0xac200780 0x1c00d001 0x0423c780
- 0x200c8809 0x0000000b 0x04001001 0xe4200780
- 0x00020409 0xc0000780 0x04011001 0xe4204780
- 0x20048801 0x00000003 0x3801c1fd 0x6c2047c8
- 0x10000801 0x0403c500 0x0002000d 0xc0000780
- 0x3801c005 0xac200780 0x1c00d001 0x0423c780
- 0x200a8809 0x0000000b 0x04001001 0xe4200780
- 0x00020409 0xc0000780 0x04011001 0xe4204780
- 0x20028801 0x00000003 0x3801c1fd 0x6c2047c8
- 0x10000801 0x0403c500 0x0002000d 0xc0000780
- 0x3801c005 0xac200780 0x1c00d001 0x0423c780
- 0x04001001 0xe4200780 0x307c09fd 0x6c0147c8
- 0x04011001 0xe4204780 0x30000003 0x00000280
- 0x1000ce01 0x0423c780 0x40014e05 0x00200780
- 0xd0044005 0x20000780 0x30100209 0xc4100780
- 0x1400c205 0x0423c780 0x60004e09 0x00208780
- 0x3401c1fd 0x6c2107c8 0x10048011 0x00000003
- 0x10208001 0x00000003 0x30070405 0xc4100780
- 0x3006040d 0xc4100780 0x21000801 0x0440c280
- 0x20000205 0x0400c780 0x00000005 0xc0000780
- 0x2101e800 0x2502e004 0x20208001 0x00000003
- 0xd00e0005 0xa0c00781
+ 0x00020805 0xc0000780 0xd4044805 0x20000780
+ 0x308709fd 0x6c4107c8 0xa00d7003 0x00000000
+ 0x1400c001 0x0423c780 0x100d7003 0x00000280
+ 0x00020805 0xc0000780 0xd408480d 0x20000780
+ 0xd4044809 0x20000780 0x1c00c001 0x0423c780
+ 0x3800c1fd 0x6c2107c8 0x1c00c001 0x0423c780
+ 0x20008805 0x0000000b 0x3800c001 0xac200780
+ 0x10000805 0x0403c500 0x04001201 0xe4204780
+ 0x04011201 0xe4200780 0xf0000001 0xe0000002
+ 0x861ffe03 0x00000000 0x308809fd 0x6c4107c8
+ 0xa00e6003 0x00000000 0x100e6003 0x00000280
+ 0x00020805 0xc0000780 0xd4064809 0x20000780
+ 0x20008805 0x00000007 0x3800c1fd 0x6c2047c8
+ 0x10000805 0x0403c500 0x0002020d 0xc0000780
+ 0x3800c001 0xac200780 0x1c00d205 0x0423c780
+ 0x04001201 0xe4204780 0x04011201 0xe4200780
+ 0xf0000001 0xe0000002 0x861ffe03 0x00000000
+ 0x308909fd 0x6c4107c8 0x30000003 0x00000280
+ 0x20298805 0x0000000b 0x00020209 0xc0000780
+ 0x20208805 0x00000003 0x3800c1fd 0x6c2047c8
+ 0x10000805 0x0403c500 0x0002020d 0xc0000780
+ 0x00020805 0xc0000780 0x3800c005 0xac200780
+ 0x1c00d201 0x0423c780 0x20198809 0x0000000b
+ 0x04001201 0xe4200780 0x00020409 0xc0000780
+ 0x04011201 0xe4204780 0x20108801 0x00000003
+ 0x3801c1fd 0x6c2047c8 0x10000801 0x0403c500
+ 0x0002000d 0xc0000780 0x3801c005 0xac200780
+ 0x1c00d201 0x0423c780 0x20118809 0x0000000b
+ 0x04001201 0xe4200780 0x00020409 0xc0000780
+ 0x04011201 0xe4204780 0x20088801 0x00000003
+ 0x3801c1fd 0x6c2047c8 0x10000801 0x0403c500
+ 0x0002000d 0xc0000780 0x3801c005 0xac200780
+ 0x1c00d201 0x0423c780 0x200d8809 0x0000000b
+ 0x04001201 0xe4200780 0x00020409 0xc0000780
+ 0x04011201 0xe4204780 0x20048801 0x00000003
+ 0x3801c1fd 0x6c2047c8 0x10000801 0x0403c500
+ 0x0002000d 0xc0000780 0x3801c005 0xac200780
+ 0x1c00d201 0x0423c780 0x200b8809 0x0000000b
+ 0x04001201 0xe4200780 0x00020409 0xc0000780
+ 0x04011201 0xe4204780 0x20028801 0x00000003
+ 0x3801c1fd 0x6c2047c8 0x10000801 0x0403c500
+ 0x0002000d 0xc0000780 0x3801c005 0xac200780
+ 0x1c00d201 0x0423c780 0x04001201 0xe4200780
+ 0x307c09fd 0x6c0147c8 0x04011201 0xe4204780
+ 0x30000003 0x00000280 0x1000d001 0x0423c780
+ 0x40014e05 0x00200780 0xd0044805 0x20000780
+ 0x30100209 0xc4100780 0x1400c205 0x0423c780
+ 0x60004e09 0x00208780 0x3401c1fd 0x6c2107c8
+ 0x10048011 0x00000003 0x10248001 0x00000003
+ 0x30070405 0xc4100780 0x3006040d 0xc4100780
+ 0x21000801 0x04428280 0x20000205 0x0400c780
+ 0x00000005 0xc0000780 0x2101e800 0x2502e004
+ 0x20208001 0x00000003 0xd00e0005 0xa0c00781
}
}
code {
@@ -1867,7 +1796,7 @@ code {
name = cudaEstimateResidual1
lmem = 0
smem = 2412
- reg = 9
+ reg = 7
bar = 1
const {
segname = const
@@ -1875,113 +1804,90 @@ code {
offset = 0
bytes = 32
mem {
- 0x000003ff 0x0000002f 0x0000001f 0x007fffff
- 0x0000007f 0x0000003f 0x0000000e 0x00000040
+ 0x000003ff 0x00000020 0x0000002f 0x0000001f
+ 0x007fffff 0x0000007f 0x0000003f 0x00000040
}
}
bincode {
- 0xd0800205 0x00400780 0xa0000209 0x04000780
- 0xa000000d 0x04000780 0x30050401 0xc4100780
- 0x20000611 0x04000780 0x308109fd 0x644107c8
- 0xa0011003 0x00000000 0x30020815 0xc4100780
- 0x10011003 0x00000280 0xa0004e01 0x04200780
- 0x30070005 0xc4100780 0x30060001 0xc4100780
- 0x20008200 0x2100ec00 0x20000a01 0x04000780
- 0xd00e0001 0x80c00780 0x00000a05 0xc0000780
- 0x04045201 0xe4200780 0x307c09fd 0x6c0147ca
- 0xa001f003 0x00000000 0x1001f003 0x00000280
- 0x1000d001 0x0423c780 0x40014c05 0x00200780
- 0x30100205 0xc4100780 0x60004c01 0x00204780
- 0x0004b201 0xe4200780 0xd012c809 0x20000780
- 0xd0118005 0x20000780 0x1100f000 0x1900e004
- 0x2100ee18 0x2541e000 0x30060001 0xac000780
- 0x0004b401 0xe4200780 0xf0000001 0xe0000002
- 0x861ffe03 0x00000000 0xd012d005 0x20000780
- 0x3404c1fd 0x6c20c7c8 0xa0030003 0x00000000
- 0x1002f003 0x00000280 0xd012c809 0x20000780
- 0xd0115005 0x20000780 0x1900e000 0x2500e000
- 0x20000801 0x04000780 0x30020001 0xc4100780
- 0x2000ca01 0x04200780 0xd00e0005 0x80c00780
- 0x1400d401 0x0423c780 0x30000201 0xec000780
- 0x10030003 0x00000780 0x1000f801 0x0403c780
- 0x00000a05 0xc0000782 0x30820805 0x6c40c7d0
- 0x04001201 0xe4200780 0xa00003fd 0x0c0147c8
- 0xa0048003 0x00000000 0x10048003 0x00001100
- 0xd012d005 0x20000780 0x2000d001 0x04210780
- 0x3400c1fd 0x6c20c7d8 0x00020005 0xc0000780
- 0xa0047003 0x00000000 0x10046003 0x00001280
- 0xd012c80d 0x20000780 0xd0115009 0x20000780
- 0x1d00e000 0x2104f004 0x2900e000 0x20008200
- 0x30020001 0xc4100780 0x2000ca01 0x04200780
- 0xd00e0005 0x80c00780 0x1800d401 0x0423c780
- 0x30000201 0xec000780 0x10047003 0x00000780
- 0x1000f801 0x0403c780 0x04001201 0xe4200782
- 0xf0000001 0xe0000002 0x861ffe03 0x00000000
- 0xd0114805 0x20000780 0x3500e001 0x00000003
- 0x307c01fd 0x6c0187d8 0x1000f819 0x0403c780
- 0x1005e003 0x00001280 0xd0114809 0x20000780
- 0x20098805 0x00000003 0x3802c001 0xc4300780
- 0x00020205 0xc0000780 0x1024801d 0x00000003
- 0x2800c005 0x04210780 0x20248021 0x00000003
- 0x00000e09 0xc0000780 0xd8118009 0x20000780
- 0x1800c001 0x0423c780 0x20048e1d 0x00000003
- 0x6600c219 0x80218780 0x30080ffd 0x640147d8
- 0x10056003 0x00001280 0x10060003 0x00000780
- 0xd0114805 0x20000780 0x2400c005 0x04210780
- 0xd0115805 0x20000780 0x1400c001 0x0423c780
- 0x30000c01 0xec000780 0x00020205 0xc0000780
- 0x2440d201 0x04200780 0xd012d005 0x20000780
- 0x301f0019 0xec100780 0x3001001d 0xc4100780
- 0x3401c001 0x6c210780 0xd0070c05 0x04008780
- 0xa0000001 0x2c014780 0x30830205 0xac400780
- 0x40010001 0x00018780 0x00000a05 0xc0000780
- 0x04025201 0xe4200780 0x861ffe03 0x00000000
- 0x308409fd 0x6c4107d8 0x00000a05 0xc0001500
- 0xd40d480d 0x20001500 0xd4094809 0x20001500
- 0x1c00c001 0x0423d500 0x2800c001 0x04201500
- 0x04025201 0xe4201500 0x861ffe03 0x00000000
- 0x308509fd 0x6c4107d8 0x00000a05 0xc0001500
- 0xd40b480d 0x20001500 0xd4094809 0x20001500
- 0x1c00c001 0x0423d500 0x2800c001 0x04201500
- 0x04025201 0xe4201500 0x861ffe03 0x00000000
- 0x00000a05 0xc0000680 0xd40a480d 0x20000680
- 0xd4094809 0x20000680 0x1c00c001 0x0423c680
- 0x2800c001 0x04200680 0x04025201 0xe4200680
- 0x861ffe03 0x00000000 0xa0096003 0x00000000
- 0x10096003 0x00000100 0x00000a05 0xc0000780
- 0xd4094809 0x20000780 0x1800e001 0x0423c780
- 0x2800c001 0x04200780 0x04025201 0xe4200780
- 0x1900f000 0x2900e000 0x04025201 0xe4200780
- 0x1900e800 0x2900e000 0x04025201 0xe4200780
- 0x1900e400 0x2900e000 0x04025201 0xe4200780
- 0x1900e200 0x2900e000 0x04025201 0xe4200780
- 0x307c05fd 0x640147ca 0x30000003 0x00000280
+ 0xd0800209 0x00400780 0xa0000001 0x04000780
+ 0x60200401 0x00000003 0x308201fd 0x644107c8
+ 0xa000f003 0x00000000 0x3002000d 0xc4100780
+ 0x1000f003 0x00000280 0xa0004e05 0x04200780
+ 0x30070209 0xc4100780 0x30060205 0xc4100780
+ 0x20018404 0x2101ec04 0x20000605 0x04004780
+ 0xd00e0205 0x80c00780 0x00000605 0xc0000780
+ 0x04045201 0xe4204780 0x307c0005 0x6c0087d2
+ 0xa00003fd 0x0c0147c8 0xa001e003 0x00000000
+ 0x1001e003 0x00001100 0x1000d005 0x0423c780
+ 0x40034c09 0x00200780 0x30100409 0xc4100780
+ 0x60024c05 0x00208780 0x0004b201 0xe4204780
0xd012c809 0x20000780 0xd0114805 0x20000780
- 0x1900e000 0x2540ee00 0x347ccbfd 0x6c2147c8
- 0x3400c005 0x04200780 0x100a1003 0x00000280
- 0xd0094805 0x20000780 0x347cc1fd 0x6c2087c8
- 0x100a3003 0x00000280 0x10018001 0x00000003
- 0x100a4003 0x00000780 0x1000f801 0x0403c780
- 0x3001d009 0xac200780 0x20018605 0x00000003
- 0x307c0409 0x8c000780 0x40030811 0x00000780
- 0x60020a11 0x00010780 0x30860619 0x64410780
- 0xd0094805 0x20000780 0x30100815 0xc4100780
- 0xa0000c19 0x2c014780 0x30010411 0xec100780
- 0x60020809 0x00014780 0x407f8c15 0x0007ffff
- 0x2544e004 0x20058408 0x30030205 0xec000780
- 0x20000205 0x04008780 0x40030009 0x00000780
- 0x60020209 0x00008780 0x30100409 0xc4100780
- 0x60020001 0x00008780 0x00020605 0xc0000780
- 0x04025201 0xe4200780 0xd4094809 0x20000780
- 0x1900e800 0x1900f804 0x3800c001 0xac200780
- 0x3801d005 0xac200780 0x30010001 0xac000780
- 0x307c07fd 0x640147c8 0x04025201 0xe4200780
- 0x30000003 0x00000280 0xd4094805 0x20000780
- 0xa0004c01 0x04200780 0x1500e204 0x1500e608
- 0x61002e01 0x00000007 0x3401c005 0xac200780
- 0x3402c409 0xac200780 0x30020001 0xc4100780
- 0x30020205 0xac000780 0x2000c801 0x04200780
- 0xd00e0005 0xa0c00781
+ 0x1100f004 0x1900e008 0x2501e010 0x2542ee04
+ 0x30040205 0xac000780 0x0004b401 0xe4204780
+ 0xf0000001 0xe0000002 0x861ffe03 0x00000000
+ 0xd012d005 0x20000780 0x3400c1fd 0x6c20c7d8
+ 0xa002f003 0x00000000 0x1002e003 0x00001280
+ 0xd012c809 0x20000780 0xd0115005 0x20000780
+ 0x1900e004 0x2501e004 0x20000005 0x04004780
+ 0x30020205 0xc4100780 0x2000ca05 0x04204780
+ 0xd00e0209 0x80c00780 0x1400d405 0x0423c780
+ 0x30010405 0xec000780 0x1002f003 0x00000780
+ 0x1000f805 0x0403c780 0x00000605 0xc0000782
+ 0x30830009 0x6c40c7e0 0x04001201 0xe4204780
+ 0xa00005fd 0x0c0147d8 0xa0047003 0x00000000
+ 0x10047003 0x00002100 0xd012d005 0x20000780
+ 0x2000d005 0x04200780 0x3401c1fd 0x6c20c7e8
+ 0x00020205 0xc0000780 0xa0046003 0x00000000
+ 0x10045003 0x00002280 0xd012c80d 0x20000780
+ 0xd0115009 0x20000780 0x1d00e004 0x2100f008
+ 0x2901e004 0x20018404 0x30020205 0xc4100780
+ 0x2000ca05 0x04204780 0xd00e0209 0x80c00780
+ 0x1800d405 0x0423c780 0x30010405 0xec000780
+ 0x10046003 0x00000780 0x1000f805 0x0403c780
+ 0x04001201 0xe4204782 0xf0000001 0xe0000002
+ 0x861ffe03 0x00000000 0xd0114805 0x20000780
+ 0x3500e005 0x00000003 0x307c03fd 0x6c0187e8
+ 0x1000f811 0x0403c780 0x1005d003 0x00002280
+ 0xd0114809 0x20000780 0x20098009 0x00000003
+ 0x3802c005 0xc4300780 0x00020405 0xc0000780
+ 0x10248015 0x00000003 0x2800c009 0x04200780
+ 0x20248219 0x00000003 0x00000a09 0xc0000780
+ 0xd8118009 0x20000780 0x1800c005 0x0423c780
+ 0x20048a15 0x00000003 0x6601c211 0x80210780
+ 0x30060bfd 0x640147e8 0x10055003 0x00002280
+ 0x1005f003 0x00000780 0xd0114805 0x20000780
+ 0x2400c009 0x04200780 0xd0115805 0x20000780
+ 0x1400c005 0x0423c780 0x30010805 0xec000780
+ 0x00020405 0xc0000780 0x2440d205 0x04204780
+ 0xd012d005 0x20000780 0x301f0211 0xec100780
+ 0x30010215 0xc4100780 0x3402c005 0x6c210780
+ 0xd0050809 0x04008780 0xa0000205 0x2c014780
+ 0x30840409 0xac400780 0x40020205 0x00018780
+ 0x00000605 0xc0000780 0x04025201 0xe4204780
+ 0x861ffe03 0x00000000 0x308501fd 0x6c4107e8
+ 0x00000605 0xc0002500 0xd40d480d 0x20002500
+ 0xd4094809 0x20002500 0x1c00c005 0x0423e500
+ 0x2800c005 0x04206500 0x04025201 0xe4206500
+ 0x861ffe03 0x00000000 0x308601fd 0x6c4107e8
+ 0x00000605 0xc0002500 0xd40b480d 0x20002500
+ 0xd4094809 0x20002500 0x1c00c001 0x0423e500
+ 0x2800c001 0x04202500 0x04025201 0xe4202500
+ 0x861ffe03 0x00000000 0x00000605 0xc0001680
+ 0xd40a480d 0x20001680 0xd4094809 0x20001680
+ 0x1c00c001 0x0423d680 0x2800c001 0x04201680
+ 0x04025201 0xe4201680 0x861ffe03 0x00000000
+ 0xa0095003 0x00000000 0x10095003 0x00001100
+ 0x00000605 0xc0000780 0xd4094809 0x20000780
+ 0x1800e001 0x0423c780 0x2800c001 0x04200780
+ 0x04025201 0xe4200780 0x1900f000 0x2900e000
+ 0x04025201 0xe4200780 0x1900e800 0x2900e000
+ 0x04025201 0xe4200780 0x1900e400 0x2900e000
+ 0x04025201 0xe4200780 0x1900e200 0x2900e000
+ 0x04025201 0xe4200780 0xf0000001 0xe0000002
+ 0x30000003 0x00000100 0xa0004c01 0x04200780
+ 0x61002e01 0x00000007 0xd0094805 0x20000780
+ 0x30020005 0xc4100780 0x1500e000 0x2101e804
+ 0xd00e0201 0xa0c00781
}
}
code {