diff --git a/CUETools.FlaCuda/FlaCudaWriter.cs b/CUETools.FlaCuda/FlaCudaWriter.cs index 3a3e528..a4fd0bf 100644 --- a/CUETools.FlaCuda/FlaCudaWriter.cs +++ b/CUETools.FlaCuda/FlaCudaWriter.cs @@ -557,131 +557,152 @@ namespace CUETools.Codecs.FlaCuda } } - static uint rice_encode_count(uint sum, uint n, uint k) - { - return n * (k + 1) + ((sum - (n >> 1)) >> (int)k); - } - - //static unsafe uint find_optimal_rice_param(uint sum, uint n) - //{ - // uint* nbits = stackalloc uint[Flake.MAX_RICE_PARAM + 1]; - // int k_opt = 0; - - // nbits[0] = UINT32_MAX; - // for (int k = 0; k <= Flake.MAX_RICE_PARAM; k++) - // { - // nbits[k] = rice_encode_count(sum, n, (uint)k); - // if (nbits[k] < nbits[k_opt]) - // k_opt = k; - // } - // return (uint)k_opt; - //} - - static unsafe int find_optimal_rice_param(uint sum, uint n, out uint nbits_best) - { - int k_opt = 0; - uint a = n; - uint b = sum - (n >> 1); - uint nbits = a + b; - for (int k = 1; k <= Flake.MAX_RICE_PARAM; k++) - { - a += n; - b >>= 1; - uint nbits_k = a + b; - if (nbits_k < nbits) - { - k_opt = k; - nbits = nbits_k; - } - } - nbits_best = nbits; - return k_opt; - } - - static unsafe uint calc_optimal_rice_params(ref RiceContext rc, int porder, uint* sums, uint n, uint pred_order) + static unsafe uint calc_optimal_rice_params(int porder, int* parm, uint* sums, uint n, uint pred_order) { uint part = (1U << porder); - uint all_bits = 0; - rc.rparams[0] = find_optimal_rice_param(sums[0], (n >> porder) - pred_order, out all_bits); - uint cnt = (n >> porder); + uint cnt = (n >> porder) - pred_order; + int k = cnt > 0 ? Math.Min(Flake.MAX_RICE_PARAM, BitReader.log2i(sums[0] / cnt)) : 0; + uint all_bits = cnt * ((uint)k + 1U) + (sums[0] >> k); + parm[0] = k; + cnt = (n >> porder); for (uint i = 1; i < part; i++) { - uint nbits; - rc.rparams[i] = find_optimal_rice_param(sums[i], cnt, out nbits); - all_bits += nbits; + k = Math.Min(Flake.MAX_RICE_PARAM, BitReader.log2i(sums[i] / cnt)); + all_bits += cnt * ((uint)k + 1U) + (sums[i] >> k); + parm[i] = k; } - all_bits += (4 * part); - rc.porder = porder; - return all_bits; + return all_bits + (4 * part); } - static unsafe void calc_sums(int pmin, int pmax, int* data, uint n, uint pred_order, uint* sums) + static unsafe void calc_lower_sums(int pmin, int pmax, uint* sums) { - // sums for highest level - int parts = (1 << pmax); - int* res = data + pred_order; - uint cnt = (n >> pmax) - pred_order; - uint sum = 0; - for (uint j = cnt; j > 0; j--) - { - int val = *(res++); - sum += (uint)((val << 1) ^ (val >> 31)); - } - sums[pmax * Flake.MAX_PARTITIONS + 0] = sum; - cnt = (n >> pmax); - for (int i = 1; i < parts; i++) - { - sum = 0; - for (uint j = cnt; j > 0; j--) - { - int val = *(res++); - sum += (uint)((val << 1) ^ (val >> 31)); - } - sums[pmax * Flake.MAX_PARTITIONS + i] = sum; - } - // sums for lower levels for (int i = pmax - 1; i >= pmin; i--) { - parts = (1 << i); - for (int j = 0; j < parts; j++) + for (int j = 0; j < (1 << i); j++) { - sums[i * Flake.MAX_PARTITIONS + j] = - sums[(i + 1) * Flake.MAX_PARTITIONS + 2 * j] + + sums[i * Flake.MAX_PARTITIONS + j] = + sums[(i + 1) * Flake.MAX_PARTITIONS + 2 * j] + sums[(i + 1) * Flake.MAX_PARTITIONS + 2 * j + 1]; } } } - static unsafe uint calc_rice_params(ref RiceContext rc, ref RiceContext tmp_rc, int pmin, int pmax, int* data, uint n, uint pred_order) + static unsafe void calc_sums(int pmin, int pmax, uint* data, uint n, uint pred_order, uint* sums) { - //uint* udata = stackalloc uint[(int)n]; + int parts = (1 << pmax); + uint* res = data + pred_order; + uint cnt = (n >> pmax) - pred_order; + uint sum = 0; + for (uint j = cnt; j > 0; j--) + sum += *(res++); + sums[0] = sum; + cnt = (n >> pmax); + for (int i = 1; i < parts; i++) + { + sum = 0; + for (uint j = cnt; j > 0; j--) + sum += *(res++); + sums[i] = sum; + } + } + + /// + /// Special case when (n >> pmax) == 18 + /// + /// + /// + /// + /// + /// + /// + static unsafe void calc_sums18(int pmin, int pmax, uint* data, uint n, uint pred_order, uint* sums) + { + int parts = (1 << pmax); + uint* res = data + pred_order; + uint cnt = 18 - pred_order; + uint sum = 0; + for (uint j = cnt; j > 0; j--) + sum += *(res++); + sums[0] = sum; + for (int i = 1; i < parts; i++) + { + sums[i] = + *(res++) + *(res++) + *(res++) + *(res++) + + *(res++) + *(res++) + *(res++) + *(res++) + + *(res++) + *(res++) + *(res++) + *(res++) + + *(res++) + *(res++) + *(res++) + *(res++) + + *(res++) + *(res++); + } + } + + /// + /// Special case when (n >> pmax) == 18 + /// + /// + /// + /// + /// + /// + /// + static unsafe void calc_sums16(int pmin, int pmax, uint* data, uint n, uint pred_order, uint* sums) + { + int parts = (1 << pmax); + uint* res = data + pred_order; + uint cnt = 16 - pred_order; + uint sum = 0; + for (uint j = cnt; j > 0; j--) + sum += *(res++); + sums[0] = sum; + for (int i = 1; i < parts; i++) + { + sums[i] = + *(res++) + *(res++) + *(res++) + *(res++) + + *(res++) + *(res++) + *(res++) + *(res++) + + *(res++) + *(res++) + *(res++) + *(res++) + + *(res++) + *(res++) + *(res++) + *(res++); + } + } + + static unsafe uint calc_rice_params(RiceContext rc, int pmin, int pmax, int* data, uint n, uint pred_order) + { + uint* udata = stackalloc uint[(int)n]; uint* sums = stackalloc uint[(pmax + 1) * Flake.MAX_PARTITIONS]; + int* parm = stackalloc int[(pmax + 1) * Flake.MAX_PARTITIONS]; //uint* bits = stackalloc uint[Flake.MAX_PARTITION_ORDER]; //assert(pmin >= 0 && pmin <= Flake.MAX_PARTITION_ORDER); //assert(pmax >= 0 && pmax <= Flake.MAX_PARTITION_ORDER); //assert(pmin <= pmax); - //for (uint i = 0; i < n; i++) - // udata[i] = (uint) ((2 * data[i]) ^ (data[i] >> 31)); + for (uint i = 0; i < n; i++) + udata[i] = (uint)((data[i] << 1) ^ (data[i] >> 31)); - calc_sums(pmin, pmax, data, n, pred_order, sums); + // sums for highest level + if ((n >> pmax) == 18) + calc_sums18(pmin, pmax, udata, n, pred_order, sums + pmax * Flake.MAX_PARTITIONS); + else if ((n >> pmax) == 16) + calc_sums16(pmin, pmax, udata, n, pred_order, sums + pmax * Flake.MAX_PARTITIONS); + else + calc_sums(pmin, pmax, udata, n, pred_order, sums + pmax * Flake.MAX_PARTITIONS); + // sums for lower levels + calc_lower_sums(pmin, pmax, sums); - int opt_porder = pmin; uint opt_bits = AudioSamples.UINT32_MAX; + int opt_porder = pmin; for (int i = pmin; i <= pmax; i++) { - uint bits = calc_optimal_rice_params(ref tmp_rc, i, sums + i * Flake.MAX_PARTITIONS, n, pred_order); + uint bits = calc_optimal_rice_params(i, parm + i * Flake.MAX_PARTITIONS, sums + i * Flake.MAX_PARTITIONS, n, pred_order); if (bits <= opt_bits) { - opt_porder = i; opt_bits = bits; - RiceContext tmp_rc2 = rc; - rc = tmp_rc; - tmp_rc = tmp_rc2; + opt_porder = i; } } + rc.porder = opt_porder; + fixed (int* rparms = rc.rparams) + AudioSamples.MemCpy(rparms, parm + opt_porder * Flake.MAX_PARTITIONS, (1 << opt_porder)); + return opt_bits; } @@ -990,7 +1011,7 @@ namespace CUETools.Codecs.FlaCuda int pmin = get_max_p_order(eparams.min_partition_order, frame.blocksize, frame.subframes[ch].best.order); int pmax = get_max_p_order(eparams.max_partition_order, frame.blocksize, frame.subframes[ch].best.order); uint bits = (uint)frame.subframes[ch].best.order * frame.subframes[ch].obits + 6; - frame.subframes[ch].best.size = bits + calc_rice_params(ref frame.subframes[ch].best.rc, ref frame.current.rc, pmin, pmax, frame.subframes[ch].best.residual, (uint)frame.blocksize, (uint)frame.subframes[ch].best.order); + frame.subframes[ch].best.size = bits + calc_rice_params(frame.subframes[ch].best.rc, pmin, pmax, frame.subframes[ch].best.residual, (uint)frame.blocksize, (uint)frame.subframes[ch].best.order); } break; case SubframeType.LPC: @@ -1009,7 +1030,7 @@ namespace CUETools.Codecs.FlaCuda int pmax = get_max_p_order(eparams.max_partition_order, frame.blocksize, frame.subframes[ch].best.order); uint bits = (uint)frame.subframes[ch].best.order * frame.subframes[ch].obits + 4 + 5 + (uint)frame.subframes[ch].best.order * (uint)frame.subframes[ch].best.cbits + 6; //uint oldsize = frame.subframes[ch].best.size; - frame.subframes[ch].best.size = bits + calc_rice_params(ref frame.subframes[ch].best.rc, ref frame.current.rc, pmin, pmax, frame.subframes[ch].best.residual, (uint)frame.blocksize, (uint)frame.subframes[ch].best.order); + frame.subframes[ch].best.size = bits + calc_rice_params(frame.subframes[ch].best.rc, pmin, pmax, frame.subframes[ch].best.residual, (uint)frame.blocksize, (uint)frame.subframes[ch].best.order); //if (frame.subframes[ch].best.size > frame.subframes[ch].obits * (uint)frame.blocksize && // oldsize <= frame.subframes[ch].obits * (uint)frame.blocksize) // throw new Exception("oops"); @@ -1213,9 +1234,10 @@ namespace CUETools.Codecs.FlaCuda cuda.SetParameter(task.cudaChooseBestMethod, 0 * sizeof(uint), (uint)task.cudaResidualTasks.Pointer); cuda.SetParameter(task.cudaChooseBestMethod, 1 * sizeof(uint), (uint)task.cudaResidualOutput.Pointer); - cuda.SetParameter(task.cudaChooseBestMethod, 2 * sizeof(uint), (uint)residualPartCount); - cuda.SetParameter(task.cudaChooseBestMethod, 3 * sizeof(uint), (uint)task.nResidualTasksPerChannel); - cuda.SetParameterSize(task.cudaChooseBestMethod, sizeof(uint) * 4U); + cuda.SetParameter(task.cudaChooseBestMethod, 2 * sizeof(uint), (uint)residualPartSize); + cuda.SetParameter(task.cudaChooseBestMethod, 3 * sizeof(uint), (uint)residualPartCount); + cuda.SetParameter(task.cudaChooseBestMethod, 4 * sizeof(uint), (uint)task.nResidualTasksPerChannel); + cuda.SetParameterSize(task.cudaChooseBestMethod, 5U * sizeof(uint)); cuda.SetFunctionBlockShape(task.cudaChooseBestMethod, 32, 8, 1); cuda.SetParameter(task.cudaCopyBestMethod, 0, (uint)task.cudaBestResidualTasks.Pointer); diff --git a/CUETools.FlaCuda/flacuda.cu b/CUETools.FlaCuda/flacuda.cu index 21d7805..a40d99f 100644 --- a/CUETools.FlaCuda/flacuda.cu +++ b/CUETools.FlaCuda/flacuda.cu @@ -61,6 +61,7 @@ typedef struct #define SUM512(buf,tid,op) if (tid < 256) buf[tid] op buf[tid + 256]; __syncthreads(); SUM256(buf,tid,op) #define FSQR(s) ((s)*(s)) +#define FASTMUL(a,b) __mul24(a,b) extern "C" __global__ void cudaStereoDecorr( int *samples, @@ -581,14 +582,8 @@ extern "C" __global__ void cudaEstimateResidual( shared.residual[tid] = shared.residual[tid] + shared.residual[tid + 8] + shared.residual[tid + 16] + shared.residual[tid + 24]; shared.residual[tid] = shared.residual[tid] + shared.residual[tid + 2] + shared.residual[tid + 4] + shared.residual[tid + 6]; - shared.residual[tid] += shared.residual[tid + 1]; - - // rice parameter search - shared.residual[tid] = (shared.task[threadIdx.y].type != Constant || shared.residual[threadIdx.y << 5] != 0) * - (__mul24(threadIdx.x >= 15, 0x7fffff) + residualLen * (threadIdx.x + 1) + ((shared.residual[threadIdx.y << 5] - (residualLen >> 1)) >> threadIdx.x)); - shared.residual[tid] = min(min(shared.residual[tid], shared.residual[tid + 4]), min(shared.residual[tid + 8], shared.residual[tid + 12])); if (threadIdx.x == 0) - output[(blockIdx.y * blockDim.y + threadIdx.y) * 64 + blockIdx.x] = min(min(shared.residual[tid], shared.residual[tid + 1]), min(shared.residual[tid + 2], shared.residual[tid + 3])); + output[(blockIdx.y * blockDim.y + threadIdx.y) * 64 + blockIdx.x] = shared.residual[tid] + shared.residual[tid + 1]; } extern "C" __global__ void cudaEstimateResidual1( @@ -612,7 +607,7 @@ extern "C" __global__ void cudaEstimateResidual1( if (tid == 0) { shared.pos = blockIdx.x * partSize; - shared.dataLen = min(shared.task.data.blocksize - shared.pos, partSize + max_order); + shared.dataLen = min(shared.task.data.blocksize - shared.pos, partSize + shared.task.data.residualOrder); } __syncthreads(); @@ -631,22 +626,10 @@ extern "C" __global__ void cudaEstimateResidual1( shared.residual[tid] = __mul24(ptr < shared.dataLen, min(0x7fffff,(sum << 1) ^ (sum >> 31))); __syncthreads(); SUM256(shared.residual, tid, +=); - - if (threadIdx.y == 0) - { - const int residualLen = max(0,min(shared.task.data.blocksize - shared.pos - shared.task.data.residualOrder, partSize)); - - // rice parameter search - shared.residual[threadIdx.x] = (shared.task.data.type != Constant || shared.residual[0] != 0) * - (__mul24(threadIdx.x >= 15, 0x7fffff) + residualLen * (threadIdx.x + 1) + ((shared.residual[0] - (residualLen >> 1)) >> threadIdx.x)); - shared.residual[threadIdx.x] = min(min(shared.residual[threadIdx.x], shared.residual[threadIdx.x + 4]), min(shared.residual[threadIdx.x + 8], shared.residual[threadIdx.x + 12])); - if (threadIdx.x == 0) - output[blockIdx.y * 64 + blockIdx.x] = min(min(shared.residual[threadIdx.x], shared.residual[threadIdx.x + 1]), min(shared.residual[threadIdx.x + 2], shared.residual[threadIdx.x + 3])); - } + if (tid == 0) + output[blockIdx.y * 64 + blockIdx.x] = shared.residual[0]; } -#define FASTMUL(a,b) __mul24(a,b) - extern "C" __global__ void cudaEstimateResidual8( int*output, int*samples, @@ -656,57 +639,61 @@ extern "C" __global__ void cudaEstimateResidual8( ) { __shared__ struct { - int data[32*9]; + volatile int data[32*9]; volatile int residual[32*8]; FlaCudaSubframeData task[8]; int coefs[32*8]; + volatile int pos; + volatile int dataLen; + volatile int dataOffs; } shared; const int tid = threadIdx.x + threadIdx.y * 32; + const int taskNo = FASTMUL(blockIdx.y, blockDim.y) + threadIdx.y; if (threadIdx.x < sizeof(FlaCudaSubframeData)/sizeof(int)) - ((int*)&shared.task[threadIdx.y])[threadIdx.x] = ((int*)(&tasks[blockIdx.y * blockDim.y + threadIdx.y]))[threadIdx.x]; + ((int*)&shared.task[threadIdx.y])[threadIdx.x] = ((int*)(&tasks[taskNo]))[threadIdx.x]; + const int ro = shared.task[threadIdx.y].residualOrder; + shared.coefs[tid] = threadIdx.x < ro ? tasks[taskNo].coefs[threadIdx.x] : 0; + if (tid == 0) + { + shared.pos = FASTMUL(blockIdx.x, partSize); + shared.dataLen = min(shared.task[0].blocksize - shared.pos, partSize + max_order); + shared.dataOffs = shared.task[0].samplesOffs + shared.pos; + } __syncthreads(); - const int pos = blockIdx.x * partSize; - const int dataLen = min(shared.task[0].blocksize - pos, partSize + max_order); // fetch samples - shared.data[tid] = tid < dataLen ? samples[shared.task[0].samplesOffs + pos + tid] >> shared.task[0].wbits : 0; - if (tid < 32) shared.data[tid + partSize] = tid + partSize < dataLen ? samples[shared.task[0].samplesOffs + pos + tid + partSize] >> shared.task[0].wbits : 0; + if (tid < shared.dataLen) + shared.data[tid] = samples[shared.dataOffs + tid] >> shared.task[0].wbits; + if (tid + partSize < shared.dataLen) + shared.data[tid + partSize] = samples[shared.dataOffs + tid + partSize] >> shared.task[0].wbits; __syncthreads(); - shared.residual[tid] = 0; - shared.coefs[tid] = threadIdx.x < shared.task[threadIdx.y].residualOrder ? tasks[blockIdx.y * blockDim.y + threadIdx.y].coefs[threadIdx.x] : 0; - - const int residualLen = max(0,min(shared.task[0].blocksize - pos - shared.task[threadIdx.y].residualOrder, partSize)); + const int residualLen = max(0,min(shared.dataLen - ro, partSize)); const int ptr2 = threadIdx.y << 5; int s = 0; - for (int ptr = threadIdx.x + blockDim.y * 32 * (shared.task[threadIdx.y].type == Verbatim); ptr < blockDim.y * 32 + threadIdx.x; ptr += 32) + for (int ptr = threadIdx.x; ptr < residualLen; ptr += 32) { // compute residual int sum = __mul24(shared.data[ptr + 0], shared.coefs[ptr2 + 0]) + __mul24(shared.data[ptr + 1], shared.coefs[ptr2 + 1]) + __mul24(shared.data[ptr + 2], shared.coefs[ptr2 + 2]) + - __mul24(shared.data[ptr + 3], shared.coefs[ptr2 + 3]) + + __mul24(shared.data[ptr + 3], shared.coefs[ptr2 + 3]); + sum += __mul24(shared.data[ptr + 4], shared.coefs[ptr2 + 4]) + __mul24(shared.data[ptr + 5], shared.coefs[ptr2 + 5]) + __mul24(shared.data[ptr + 6], shared.coefs[ptr2 + 6]) + __mul24(shared.data[ptr + 7], shared.coefs[ptr2 + 7]); - sum = shared.data[ptr + shared.task[threadIdx.y].residualOrder] - (sum >> shared.task[threadIdx.y].shift); - s += __mul24(ptr < residualLen, min(0x7fffff,(sum << 1) ^ (sum >> 31))); + sum = shared.data[ptr + ro] - (sum >> shared.task[threadIdx.y].shift); + s += min(0x7fffff,(sum << 1) ^ (sum >> 31)); } shared.residual[tid] = s; shared.residual[tid] = shared.residual[tid] + shared.residual[tid + 8] + shared.residual[tid + 16] + shared.residual[tid + 24]; shared.residual[tid] = shared.residual[tid] + shared.residual[tid + 2] + shared.residual[tid + 4] + shared.residual[tid + 6]; - shared.residual[tid] += shared.residual[tid + 1]; - - // rice parameter search - shared.residual[tid] = (shared.task[threadIdx.y].type != Constant || shared.residual[threadIdx.y << 5] != 0) * - (__mul24(threadIdx.x >= 15, 0x7fffff) + residualLen * (threadIdx.x + 1) + ((shared.residual[threadIdx.y << 5] - (residualLen >> 1)) >> threadIdx.x)); - shared.residual[tid] = min(min(shared.residual[tid], shared.residual[tid + 4]), min(shared.residual[tid + 8], shared.residual[tid + 12])); if (threadIdx.x == 0) - output[(blockIdx.y * blockDim.y + threadIdx.y) * 64 + blockIdx.x] = min(min(shared.residual[tid], shared.residual[tid + 1]), min(shared.residual[tid + 2], shared.residual[tid + 3])); + output[(blockIdx.y * blockDim.y + threadIdx.y) * 64 + blockIdx.x] = shared.residual[tid] + shared.residual[tid + 1]; } extern "C" __global__ void cudaEstimateResidual12( @@ -727,8 +714,11 @@ extern "C" __global__ void cudaEstimateResidual12( volatile int dataOffs; } shared; const int tid = threadIdx.x + threadIdx.y * 32; + const int taskNo = FASTMUL(blockIdx.y, blockDim.y) + threadIdx.y; if (threadIdx.x < sizeof(FlaCudaSubframeData)/sizeof(int)) - ((int*)&shared.task[threadIdx.y])[threadIdx.x] = ((int*)(&tasks[FASTMUL(blockIdx.y, blockDim.y) + threadIdx.y]))[threadIdx.x]; + ((int*)&shared.task[threadIdx.y])[threadIdx.x] = ((int*)(&tasks[taskNo]))[threadIdx.x]; + const int ro = shared.task[threadIdx.y].residualOrder; + shared.coefs[tid] = threadIdx.x < ro ? tasks[taskNo].coefs[threadIdx.x] : 0; if (tid == 0) { shared.pos = FASTMUL(blockIdx.x, partSize); @@ -738,30 +728,30 @@ extern "C" __global__ void cudaEstimateResidual12( __syncthreads(); // fetch samples - shared.data[tid] = tid < shared.dataLen ? samples[shared.dataOffs + tid] >> shared.task[0].wbits : 0; - if (tid < 32) shared.data[tid + partSize] = tid + partSize < shared.dataLen ? samples[shared.dataOffs + tid + partSize] >> shared.task[0].wbits : 0; + if (tid < shared.dataLen) + shared.data[tid] = samples[shared.dataOffs + tid] >> shared.task[0].wbits; + if (tid + partSize < shared.dataLen) + shared.data[tid + partSize] = samples[shared.dataOffs + tid + partSize] >> shared.task[0].wbits; __syncthreads(); - const int ro = shared.task[threadIdx.y].residualOrder; - const int residualLen = max(0,min(shared.task[0].blocksize - shared.pos - ro, partSize)); + int residualLen = max(0,min(shared.dataLen - ro, partSize)); const int ptr2 = threadIdx.y << 5; - - shared.coefs[tid] = threadIdx.x < ro ? tasks[FASTMUL(blockIdx.y, blockDim.y) + threadIdx.y].coefs[threadIdx.x] : 0; - int s = 0; - for (int ptr = shared.task[threadIdx.y].type == Verbatim ? residualLen : threadIdx.x; ptr < residualLen; ptr += 32) + for (int ptr = threadIdx.x; ptr < residualLen; ptr += 32) { // compute residual int sum = FASTMUL(shared.data[ptr + 0], shared.coefs[ptr2 + 0]) + FASTMUL(shared.data[ptr + 1], shared.coefs[ptr2 + 1]) + FASTMUL(shared.data[ptr + 2], shared.coefs[ptr2 + 2]) + - FASTMUL(shared.data[ptr + 3], shared.coefs[ptr2 + 3]) + + FASTMUL(shared.data[ptr + 3], shared.coefs[ptr2 + 3]); + sum += FASTMUL(shared.data[ptr + 4], shared.coefs[ptr2 + 4]) + FASTMUL(shared.data[ptr + 5], shared.coefs[ptr2 + 5]) + FASTMUL(shared.data[ptr + 6], shared.coefs[ptr2 + 6]) + - FASTMUL(shared.data[ptr + 7], shared.coefs[ptr2 + 7]) + + FASTMUL(shared.data[ptr + 7], shared.coefs[ptr2 + 7]); + sum += FASTMUL(shared.data[ptr + 8], shared.coefs[ptr2 + 8]) + FASTMUL(shared.data[ptr + 9], shared.coefs[ptr2 + 9]) + FASTMUL(shared.data[ptr + 10], shared.coefs[ptr2 + 10]) + @@ -773,19 +763,14 @@ extern "C" __global__ void cudaEstimateResidual12( shared.residual[tid] = s; shared.residual[tid] = shared.residual[tid] + shared.residual[tid + 8] + shared.residual[tid + 16] + shared.residual[tid + 24]; shared.residual[tid] = shared.residual[tid] + shared.residual[tid + 2] + shared.residual[tid + 4] + shared.residual[tid + 6]; - shared.residual[tid] += shared.residual[tid + 1]; - - // rice parameter search - shared.residual[tid] = (shared.task[threadIdx.y].type != Constant || shared.residual[threadIdx.y << 5] != 0) * - (__mul24(threadIdx.x >= 15, 0x7fffff) + FASTMUL(residualLen, threadIdx.x + 1) + ((shared.residual[threadIdx.y << 5] - (residualLen >> 1)) >> threadIdx.x)); - shared.residual[tid] = min(min(shared.residual[tid], shared.residual[tid + 4]), min(shared.residual[tid + 8], shared.residual[tid + 12])); if (threadIdx.x == 0) - output[(blockIdx.y * blockDim.y + threadIdx.y) * 64 + blockIdx.x] = min(min(shared.residual[tid], shared.residual[tid + 1]), min(shared.residual[tid + 2], shared.residual[tid + 3])); + output[(blockIdx.y * blockDim.y + threadIdx.y) * 64 + blockIdx.x] = shared.residual[tid] + shared.residual[tid + 1]; } extern "C" __global__ void cudaChooseBestMethod( FlaCudaSubframeTask *tasks, int *residual, + int partSize, int partCount, // <= blockDim.y (256) int taskCount ) @@ -806,8 +791,18 @@ extern "C" __global__ void cudaChooseBestMethod( ((int*)&shared.task[threadIdx.y])[threadIdx.x] = ((int*)(tasks + task + threadIdx.y + taskCount * blockIdx.y))[threadIdx.x]; int sum = 0; - for (int pos = 0; pos < partCount; pos += blockDim.x) - sum += (pos + threadIdx.x < partCount ? residual[pos + threadIdx.x + 64 * (task + threadIdx.y + taskCount * blockIdx.y)] : 0); + for (int pos = threadIdx.x; pos < partCount; pos += blockDim.x) + { + // fetch part sum + int psum = residual[pos + 64 * (task + threadIdx.y + taskCount * blockIdx.y)]; + // calculate part size + int residualLen = max(0,min(shared.task[threadIdx.y].data.blocksize - FASTMUL(pos, partSize) - shared.task[threadIdx.y].data.residualOrder, partSize)); + residualLen = FASTMUL(residualLen, shared.task[threadIdx.y].data.type != Constant || psum != 0); + // calculate rice parameter + int k = max(0, min(14, __float2int_rz(__log2f((psum + 0.000001f) / (residualLen + 0.000001f) + 0.5f)))); + // calculate part bit length + sum += FASTMUL(residualLen, k + 1) + (psum >> k); + } shared.partLen[tid] = sum; // length sum: reduction in shared mem diff --git a/CUETools.FlaCuda/flacuda.cubin b/CUETools.FlaCuda/flacuda.cubin index 11b0cbe..ea93441 100644 --- a/CUETools.FlaCuda/flacuda.cubin +++ b/CUETools.FlaCuda/flacuda.cubin @@ -562,128 +562,99 @@ code { name = cudaEstimateResidual12 lmem = 0 smem = 3760 - reg = 15 + reg = 13 bar = 1 const { segname = const segnum = 1 offset = 0 - bytes = 24 + bytes = 12 mem { - 0x000003ff 0x0000000f 0x0000001f 0x00000001 - 0x007fffff 0x0000000e + 0x000003ff 0x0000000f 0x007fffff } } bincode { - 0xd0800205 0x00400780 0xa0000021 0x04000780 - 0xa0000219 0x04000780 0x308111fd 0x644107c8 - 0xa0013003 0x00000000 0x30060c1d 0xc4100780 - 0x10013003 0x00000280 0xa0004e01 0x04200780 - 0xa0004405 0x04200780 0x40418000 0x20008c00 - 0x30070005 0xc4100780 0x30060001 0xc4100780 - 0x20000205 0x04000780 0x30021001 0xc4100780 - 0x2101ec08 0x20008e04 0x20000001 0x04008780 - 0xd00e0001 0x80c00780 0x00000205 0xc0000780 - 0x04045201 0xe4200780 0x30050c01 0xc4100782 - 0x20000025 0x040207c0 0xa0022003 0x00000000 - 0x10022003 0x00000280 0xa0004c01 0x04200780 - 0x4000d001 0x00218780 0x00075201 0xe4200780 - 0xd01d4809 0x20000780 0xd0115005 0x20000780 - 0x1100f000 0x1900e004 0x2100ee08 0x2541ec00 - 0x30020001 0xac000780 0x00075401 0xe4200780 - 0x1900e000 0x2500e000 0x00075601 0xe4200780 + 0xd0800205 0x00400780 0xa0000019 0x04000780 + 0xa0000215 0x04000780 0x30810dfd 0x644107c8 + 0xa0013003 0x00000000 0x30060a01 0xc4100780 + 0x10013003 0x00000280 0xa0004e05 0x04200780 + 0xa0004409 0x04200780 0x40428204 0x20018a04 + 0x30070209 0xc4100780 0x30060205 0xc4100780 + 0x20000409 0x04004780 0x30020c05 0xc4100780 + 0x2102ec0c 0x20018008 0x20000205 0x0400c780 + 0xd00e0205 0x80c00780 0x00000405 0xc0000780 + 0x04045201 0xe4204780 0x00000005 0xc0000782 + 0xd4114809 0x20000780 0x3806c1fd 0x6420c7c8 + 0xa0025003 0x00000000 0x1800c001 0x0423c780 + 0x10024003 0x00000280 0xa0004e05 0x04200780 + 0xa0004409 0x04200780 0x40428204 0x20018a04 + 0x30070209 0xc4100780 0x30060205 0xc4100780 + 0x20000405 0x04004780 0x30020c09 0xc4100780 + 0x2101ec04 0x20018404 0x20008205 0x00000007 + 0xd00e0205 0x80c00780 0x10025003 0x00000780 + 0x1000f805 0x0403c780 0x30050a09 0xc4100782 + 0x2000041d 0x040187c0 0x00020e09 0xc0000780 + 0x08055201 0xe4204780 0xa0036003 0x00000000 + 0x10036003 0x00000280 0xa0004c05 0x04200780 + 0x4001d005 0x00218780 0x00075201 0xe4204780 + 0xd01d480d 0x20000780 0xd0115009 0x20000780 + 0x1100f004 0x1d00e008 0x2101ee0c 0x2942ec04 + 0x30030205 0xac000780 0x00075401 0xe4204780 + 0x1d00e004 0x2901e004 0x00075601 0xe4204780 0xf0000001 0xe0000002 0x861ffe03 0x00000000 - 0xd01d5005 0x20000780 0x3409c1fd 0x6c20c7c8 - 0xa0032003 0x00000000 0x10031003 0x00000280 - 0xd01d5805 0x20000780 0x2400c001 0x04224780 - 0x30020001 0xc4100780 0xd011a005 0x20000780 - 0x2000ca01 0x04200780 0xd00e0005 0x80c00780 - 0x1400c001 0x0423c780 0x30000201 0xec000780 - 0x10032003 0x00000780 0x1000f801 0x0403c780 - 0x308213fd 0x6c4107ca 0x00021209 0xc0000780 - 0x08001201 0xe4200780 0xa0048003 0x00000000 - 0x10048003 0x00000280 0xd01d5005 0x20000780 - 0x2000d001 0x04224780 0x3400c1fd 0x6c20c7c8 - 0x00020005 0xc0000780 0xa0047003 0x00000000 - 0x10046003 0x00000280 0xd01d5809 0x20000780 - 0x2909e000 0x2100f000 0x30020001 0xc4100780 - 0xd011a009 0x20000780 0x2000ca01 0x04200780 - 0xd00e0005 0x80c00780 0x1800c001 0x0423c780 - 0x30000201 0xec000780 0x10047003 0x00000780 - 0x1000f801 0x0403c780 0x04001201 0xe4200782 - 0xf0000001 0xe0000002 0x861ffe03 0x00000000 - 0xd01d4805 0x20000780 0xd011800d 0x20000780 - 0x1400c001 0x0423c780 0x00000e09 0xc0000780 - 0xd8114805 0x20000780 0x2c40c005 0x04200780 - 0x3408c1fd 0x6420c7c8 0xa0060003 0x00000000 - 0x1500e000 0x3501e004 0x1005f003 0x00000280 - 0xa0004e09 0x04200780 0xa000440d 0x04200780 - 0x40438408 0x20028c08 0x3007040d 0xc4100780 - 0x30060409 0xc4100780 0x20000609 0x04008780 - 0x3002100d 0xc4100780 0x2102ec08 0x20028608 - 0x20008409 0x00000007 0xd00e0409 0x80c00780 - 0x10060003 0x00000780 0x1000f809 0x0403c780 - 0x3001d005 0xac200782 0x00021205 0xc0000780 - 0x04055201 0xe4208780 0x00000e05 0xc0000780 - 0xd4117005 0x20000780 0x307c0205 0x8c000780 - 0x3483c1fd 0x6c6147c8 0x10000209 0x0403c780 - 0x10001009 0x0403c280 0x300203fd 0x6c00c7c8 - 0xa0099003 0x00000000 0x1000f831 0x0403c780 - 0x10098003 0x00000280 0x20000429 0x04000780 - 0x20098409 0x00000003 0x2009940d 0x00000003 - 0x00070c05 0xc0000780 0x0002040d 0xc0000780 - 0x00020611 0xc0000780 0x2000002d 0x04004780 - 0xd4154809 0x20000780 0x1900e20c 0x1900e610 - 0x1900e008 0x1900e400 0x4d43e214 0x4d44e610 - 0x1800ca0d 0x0423c780 0x6c02c009 0x80214780 - 0x6c00c411 0x80210780 0x1900e800 0x4d43ea0c - 0x20000409 0x04010780 0x6c00c801 0x8020c780 - 0x20008434 0x1900ee0c 0x1900ec10 0x1900f208 - 0x1900f014 0x1900f638 0x1900f400 0x4d43ee0c - 0x00000e09 0xc0000780 0xd8115809 0x20000780 - 0x6c04cc0d 0x8020c780 0x1900e010 0x4d42f208 - 0x20001a0d 0x0400c780 0x6c05d009 0x80208780 - 0x4d4ef614 0x20028608 0x6c00d401 0x80214780 - 0x20000401 0x04000780 0x30040001 0xec000780 - 0x2040c001 0x04200784 0x301f0009 0xec100780 - 0x30010001 0xc4100780 0xd0000401 0x04008780 - 0x20209429 0x00000003 0x30840001 0xac400780 - 0x300b15fd 0x6c0047c8 0x20001831 0x04000780 - 0xdc01000d 0x20000780 0xd0010011 0x20000784 - 0x10074003 0x00000280 0x10099003 0x00000780 - 0x00070c05 0xc0000780 0x00021209 0xc0000782 - 0x08025201 0xe4230780 0xd809480d 0x20000780 - 0x1c00d001 0x0423c780 0x1c00f009 0x0423c780 - 0x2c00c001 0x04200780 0x2c00e009 0x04208780 - 0x20000001 0x04008780 0x08025201 0xe4200780 - 0x1d00e400 0x1d00ec08 0x2d00e000 0x2d02e808 - 0x20000001 0x04008780 0x08025201 0xe4200780 - 0x1d00e200 0x2d00e000 0x08025201 0xe4200780 - 0x00000e09 0xc0000780 0xd8117009 0x20000780 - 0x387cc1fd 0x6c2147c8 0xa00b3003 0x00000000 - 0x100b0003 0x00000280 0xd4094809 0x20000780 - 0x387cc1fd 0x6c2087c8 0x100b2003 0x00000280 - 0x10018001 0x00000003 0x100b3003 0x00000780 - 0x1000f801 0x0403c780 0x30851009 0x64410782 - 0xd4094805 0x20000780 0xa000040d 0x2c014780 - 0x30010211 0xec100780 0x20019009 0x00000003 - 0x407f860d 0x0007ffff 0x2440c011 0x04210780 - 0x60020205 0x8000c780 0x30080809 0xec000780 - 0x20000205 0x04008780 0x40030009 0x00000780 - 0x60020209 0x00008780 0x30100409 0xc4100780 - 0x60020001 0x00008780 0x00021209 0xc0000780 - 0x08025201 0xe4200780 0xd8094805 0x20000780 - 0x1500e800 0x1500f804 0x3400c001 0xac200780 - 0x3401d005 0xac200780 0x30010001 0xac000780 - 0x307c11fd 0x640147c8 0x08025201 0xe4200780 + 0xd01d5009 0x20000780 0x3807c1fd 0x6c20c7c8 + 0xa0046003 0x00000000 0x10046003 0x00000280 + 0xd01d5809 0x20000780 0x2800c005 0x0421c780 + 0x30020205 0xc4100780 0xd011a009 0x20000780 + 0x2000ca05 0x04204780 0xd00e0209 0x80c00780 + 0x1800c005 0x0423c780 0x30010405 0xec000780 + 0x00020e09 0xc0000780 0x08001201 0xe4204780 + 0xd01d5009 0x20000782 0x2000d005 0x0421c780 + 0x3801c1fd 0x6c20c7c8 0x00020209 0xc0000780 + 0xa0055003 0x00000000 0x10055003 0x00000280 + 0xd01d580d 0x20000780 0x2d07e004 0x2101f004 + 0x30020205 0xc4100780 0xd011a00d 0x20000780 + 0x2000ca05 0x04204780 0xd00e0209 0x80c00780 + 0x1c00c005 0x0423c780 0x30010405 0xec000780 + 0x08001201 0xe4204780 0xf0000001 0xe0000002 + 0x861ffe03 0x00000000 0xd01d5009 0x20000780 + 0x2840c005 0x04200780 0x3001d005 0xac200780 + 0x307c0205 0x8c000780 0x300603fd 0x6c00c7c8 + 0xa0088003 0x00000000 0x1000f821 0x0403c780 + 0x10088003 0x00000280 0x20000025 0x04018780 + 0x20098c09 0x00000003 0x2009920d 0x00000003 + 0x20000029 0x04004780 0x0002040d 0xc0000780 + 0x00020611 0xc0000780 0x00070a09 0xc0000780 + 0xd8154809 0x20000780 0x1900e204 0x1900e60c + 0x1900e000 0x1900e408 0x4d41e204 0x4d43e60c + 0x6c00c001 0x80204780 0x6c02c405 0x8020c780 + 0x20018004 0x1900e800 0x6c00c805 0x80204780 + 0x1800ca01 0x0423c780 0x6c00ca31 0x80204780 + 0x1900ee2c 0x1900ec10 0x1900f004 0x1900f200 + 0x1900f60c 0x1900f408 0x4c0bce2d 0x00218780 + 0xd4115809 0x20000780 0x6c04cc2d 0x8022c780 + 0x1900e010 0x200b982c 0x6c01d005 0x8022c780 + 0x4c03d60d 0x00218780 0x6c00d201 0x80204780 + 0x6c02d405 0x8020c780 0x20000001 0x04004780 + 0x30040001 0xec000780 0x2040c001 0x04200784 + 0x301f0005 0xec100780 0x30010001 0xc4100780 + 0xd0000201 0x04008780 0x20209225 0x00000003 + 0x30820001 0xac400780 0x300a13fd 0x6c0047c8 + 0x20001021 0x04000780 0xdc01000d 0x20000780 + 0xd0010011 0x20000784 0x10065003 0x00000280 + 0x00020e05 0xc0000782 0x04025201 0xe4220780 + 0xd4094809 0x20000780 0x1800d001 0x0423c780 + 0x1800f005 0x0423c780 0x2800c001 0x04200780 + 0x2800e005 0x04204780 0x20000001 0x04004780 + 0x04025201 0xe4200780 0x1900e400 0x1900ec04 + 0x2900e000 0x2901e804 0x20000001 0x04004780 + 0x307c0dfd 0x640147c8 0x04025201 0xe4200780 0x30000003 0x00000280 0x10004401 0x0023c780 - 0x60004e01 0x00218780 0x00021209 0xc0000780 - 0xd8094805 0x20000780 0xa0004c09 0x04200780 - 0x3006000d 0xc4100780 0x1500e200 0x1500e604 - 0x20000409 0x0400c780 0x3400c00d 0xac200780 - 0x3401c405 0xac200780 0x30020401 0xc4100780 - 0x30010605 0xac000780 0x2000c801 0x04200780 - 0xd00e0005 0xa0c00781 + 0x60004e05 0x00214780 0x00020e05 0xc0000780 + 0xa0004c01 0x04200780 0x30060205 0xc4100780 + 0xd4094805 0x20000780 0x20018004 0x1500e200 + 0x30020205 0xc4100780 0x2500e008 0x2101e800 + 0xd00e0009 0xa0c00781 } } code { @@ -838,253 +809,196 @@ code { code { name = cudaEstimateResidual8 lmem = 0 - smem = 3748 + smem = 3760 reg = 14 bar = 1 const { segname = const segnum = 1 offset = 0 - bytes = 28 + bytes = 12 mem { - 0x000003ff 0x0000000f 0x0000001f 0x00000020 - 0x00000001 0x007fffff 0x0000000e + 0x000003ff 0x0000000f 0x007fffff } } bincode { - 0xd0800209 0x00400780 0xa0000001 0x04000780 - 0xa0000421 0x04000780 0x308101fd 0x644107c8 - 0xa0012003 0x00000000 0x30061009 0xc4100780 - 0x10012003 0x00000280 0x10004409 0x0023c780 - 0x60024e05 0x00220780 0x3007020d 0xc4100780 - 0x30060205 0xc4100780 0x2000060d 0x04004780 - 0x30020005 0xc4100780 0x2103ec10 0x2001840c - 0x20000205 0x04010780 0xd00e0205 0x80c00780 - 0x00000605 0xc0000780 0x04045201 0xe4204780 + 0xd0800205 0x00400780 0xa0000019 0x04000780 + 0xa0000215 0x04000780 0x30810dfd 0x644107c8 + 0xa0013003 0x00000000 0x30060a01 0xc4100780 + 0x10013003 0x00000280 0xa0004e05 0x04200780 + 0xa0004409 0x04200780 0x40428204 0x20018a04 + 0x30070209 0xc4100780 0x30060205 0xc4100780 + 0x20000409 0x04004780 0x30020c05 0xc4100780 + 0x2102ec0c 0x20018008 0x20000205 0x0400c780 + 0xd00e0205 0x80c00780 0x00000405 0xc0000780 + 0x04045201 0xe4204780 0x00000005 0xc0000782 + 0xd4114809 0x20000780 0x3806c1fd 0x6420c7c8 + 0xa0025003 0x00000000 0x1800c001 0x0423c780 + 0x10024003 0x00000280 0xa0004e05 0x04200780 + 0xa0004409 0x04200780 0x40428204 0x20018a04 + 0x30070209 0xc4100780 0x30060205 0xc4100780 + 0x20000405 0x04004780 0x30020c09 0xc4100780 + 0x2101ec04 0x20018404 0x20008205 0x00000007 + 0xd00e0205 0x80c00780 0x10025003 0x00000780 + 0x1000f805 0x0403c780 0x30050a09 0xc4100782 + 0x2000041d 0x040187c0 0x00020e09 0xc0000780 + 0x08055201 0xe4204780 0xa0036003 0x00000000 + 0x10036003 0x00000280 0xa0004c05 0x04200780 + 0x4001d005 0x00218780 0x00075201 0xe4204780 + 0xd01d480d 0x20000780 0xd0115009 0x20000780 + 0x1100f004 0x1d00e008 0x2101ee0c 0x2942ec04 + 0x30030205 0xac000780 0x00075401 0xe4204780 + 0x1d00e004 0x2901e004 0x00075601 0xe4204780 0xf0000001 0xe0000002 0x861ffe03 0x00000000 - 0xa0004c15 0x04200780 0x1000d005 0x0423c780 - 0x400a060d 0x00000780 0x3010060d 0xc4100780 - 0x600a040d 0x0000c780 0x1000d005 0x0423c780 - 0xd0118005 0x20000780 0x2101ee04 0x2543e010 - 0x30051019 0xc4100780 0x30040211 0xac000780 - 0x20000c25 0x04000780 0x300909fd 0x6c00c7c8 - 0xa002b003 0x00000000 0x1002a003 0x00000280 - 0xd0115005 0x20000780 0x2503e004 0x20019204 - 0x30020205 0xc4100780 0x2000ca05 0x04204780 - 0xd00e0219 0x80c00780 0x1400d405 0x0423c780 - 0x30010c05 0xec000780 0x1002b003 0x00000780 - 0x1000f805 0x0403c780 0x308213fd 0x6c4107ca - 0x0002120d 0xc0000780 0x0c001201 0xe4204780 - 0xa0040003 0x00000000 0x10040003 0x00000280 - 0x2000d005 0x04224780 0x300403fd 0x6c0187c8 - 0x00020205 0xc0000780 0xa003f003 0x00000000 - 0x1003e003 0x00000280 0xd0115009 0x20000780 - 0x2109f010 0x2903e004 0x20000205 0x04010780 - 0x30020205 0xc4100780 0x2000ca05 0x04204780 - 0xd00e0211 0x80c00780 0x1800d405 0x0423c780 - 0x30010805 0xec000780 0x1003f003 0x00000780 - 0x1000f805 0x0403c780 0x04001201 0xe4204782 - 0xf0000001 0xe0000002 0x861ffe03 0x00000000 - 0x00000409 0xc0000780 0x0002120d 0xc0000780 - 0x0c025201 0xe43f0780 0xd8114805 0x20000780 - 0x3400c1fd 0x6420c7c8 0xa0054003 0x00000000 - 0x10053003 0x00000280 0x10004409 0x0023c780 - 0x60024e05 0x00220780 0x30070209 0xc4100780 - 0x30060205 0xc4100780 0x20000409 0x04004780 - 0x30020005 0xc4100780 0x2102ec08 0x20028204 - 0x20008205 0x00000007 0xd00e0205 0x80c00780 - 0x10054003 0x00000780 0x1000f805 0x0403c780 - 0x0002120d 0xc0000782 0x0c055201 0xe4204780 - 0xd0118005 0x20000780 0xd811480d 0x20000780 - 0x2440c009 0x0420c780 0x3c84cbfd 0x6c6147c8 - 0x60864405 0x00600780 0x3c00c011 0x04208780 - 0x1000020d 0x0403c500 0x1000000d 0x0403c280 - 0x300107fd 0x640187c8 0xa008f003 0x00000000 - 0x10000609 0x0403c780 0x1000f829 0x0403c780 - 0x1008c003 0x00000280 0x20098619 0x00000003 - 0x3004d00d 0xac200780 0x00071005 0xc0000780 - 0x00020c0d 0xc0000780 0x307c0619 0x8c000780 - 0xd4154811 0x20000780 0x1000c21d 0x0423c784 - 0x1000c62d 0x0423c784 0x1000c011 0x0423c784 - 0x1000c40d 0x0423c784 0x4d47e21c 0x4d4be62c - 0x6c04c031 0x8021c780 0x6c03c435 0x8022c780 - 0x1000ca2d 0x0423c784 0x1000c81d 0x0423c784 - 0x1000ce11 0x0423c784 0x1000cc0d 0x0423c784 - 0x4c0bca2d 0x00218780 0xd8114811 0x20000780 - 0x20001831 0x04034780 0x6c07c835 0x8022c780 - 0x2000c02d 0x04208784 0x1000c41d 0x0423c784 - 0x4d44ee10 0x200d9830 0x6c03cc0d 0x80210780 - 0x2000180d 0x0400c780 0x00021611 0xc0000780 - 0x3007060d 0xec000780 0x2040d20d 0x0420c784 - 0x301f0611 0xec100780 0x3001061d 0xc4100780 - 0x30020c0d 0x6c010780 0xd0070811 0x04008780 - 0x20208409 0x00000003 0xa000060d 0x2c014780 - 0x30850811 0xac400780 0x300105fd 0x640047c8 - 0x60040629 0x80028780 0xdc01000d 0x20000780 - 0x10068003 0x00000280 0x1008f003 0x00000780 - 0x3004d005 0xac200780 0x00071005 0xc0000780 - 0x307c0219 0x8c000780 0x0002120d 0xc0000782 - 0x0c025201 0xe4228780 0xdc094811 0x20000780 - 0x1000d005 0x0423c784 0x1000f009 0x0423c784 - 0x2000c005 0x04204784 0x2000e009 0x04208784 - 0x20000205 0x04008780 0x0c025201 0xe4204780 - 0x1000c405 0x0423c784 0x1000cc09 0x0423c784 - 0x2000c005 0x04204784 0x2000c809 0x04208784 - 0x20000205 0x04008780 0x0c025201 0xe4204780 - 0x1000c205 0x0423c784 0x2000c005 0x04204784 - 0x0c025201 0xe4204780 0xd8117009 0x20000780 - 0x387cc1fd 0x6c2147c8 0xa00ab003 0x00000000 - 0x100a8003 0x00000280 0xd4094809 0x20000780 - 0x387cc1fd 0x6c2087c8 0x100aa003 0x00000280 - 0x10018005 0x00000003 0x100ab003 0x00000780 - 0x1000f805 0x0403c780 0xf0000001 0xe0000002 - 0x20018009 0x00000003 0x4005180d 0x00000780 - 0x60041a0d 0x0000c780 0x3086001d 0x64410780 - 0xd4094805 0x20000780 0x30100611 0xc4100780 - 0xa0000e1d 0x2c014780 0x30010c0d 0xec100780 - 0x60041811 0x00010780 0x407f8e19 0x0007ffff - 0x2543e008 0x2006880c 0x30000409 0xec000780 - 0x20000409 0x0400c780 0x4005040d 0x00000780 - 0x6004060d 0x0000c780 0x3010060d 0xc4100780 - 0x60040405 0x0000c780 0x00021209 0xc0000780 - 0x08025201 0xe4204780 0xd8094805 0x20000780 - 0x1500e804 0x1500f808 0x3401c005 0xac200780 - 0x3402d009 0xac200780 0x30020205 0xac000780 - 0x307c01fd 0x640147c8 0x08025201 0xe4204780 + 0xd01d5009 0x20000780 0x3807c1fd 0x6c20c7c8 + 0xa0046003 0x00000000 0x10046003 0x00000280 + 0xd01d5809 0x20000780 0x2800c005 0x0421c780 + 0x30020205 0xc4100780 0xd011a009 0x20000780 + 0x2000ca05 0x04204780 0xd00e0209 0x80c00780 + 0x1800c005 0x0423c780 0x30010405 0xec000780 + 0x00020e09 0xc0000780 0x08001201 0xe4204780 + 0xd01d5009 0x20000782 0x2000d005 0x0421c780 + 0x3801c1fd 0x6c20c7c8 0x00020209 0xc0000780 + 0xa0055003 0x00000000 0x10055003 0x00000280 + 0xd01d580d 0x20000780 0x2d07e004 0x2101f004 + 0x30020205 0xc4100780 0xd011a00d 0x20000780 + 0x2000ca05 0x04204780 0xd00e0209 0x80c00780 + 0x1c00c005 0x0423c780 0x30010405 0xec000780 + 0x08001201 0xe4204780 0xf0000001 0xe0000002 + 0x861ffe03 0x00000000 0xd01d5009 0x20000780 + 0x2840c005 0x04200780 0x3001d005 0xac200780 + 0x307c0205 0x8c000780 0x300603fd 0x6c00c7c8 + 0xa0082003 0x00000000 0x1000f821 0x0403c780 + 0x10082003 0x00000280 0x20000025 0x04018780 + 0x20098c09 0x00000003 0x2009920d 0x00000003 + 0x20000029 0x04004780 0x0002040d 0xc0000780 + 0x00020611 0xc0000780 0x00070a09 0xc0000780 + 0xd8154809 0x20000780 0x1900e204 0x1900e000 + 0x4c01c205 0x00218780 0x6c00c031 0x80204780 + 0x1900e62c 0x1900e410 0x1900ea04 0x1900e800 + 0x1900ee0c 0x1900ec08 0x4c0bc62d 0x00218780 + 0xd4115809 0x20000780 0x6c04c42d 0x8022c780 + 0x1900e010 0x4d41ea34 0x20001805 0x0402c780 + 0x6c00c801 0x80234780 0x4d43ee0c 0x20008200 + 0x6c02cc05 0x8020c780 0x20000001 0x04004780 + 0x30040001 0xec000780 0x2040c001 0x04200784 + 0x301f0005 0xec100780 0x30010001 0xc4100780 + 0xd0000201 0x04008780 0x20209225 0x00000003 + 0x30820001 0xac400780 0x300a13fd 0x6c0047c8 + 0x20001021 0x04000780 0xdc01000d 0x20000780 + 0xd0010011 0x20000784 0x10065003 0x00000280 + 0x00020e05 0xc0000782 0x04025201 0xe4220780 + 0xd4094809 0x20000780 0x1800d001 0x0423c780 + 0x1800f005 0x0423c780 0x2800c001 0x04200780 + 0x2800e005 0x04204780 0x20000001 0x04004780 + 0x04025201 0xe4200780 0x1900e400 0x1900ec04 + 0x2900e000 0x2901e804 0x20000001 0x04004780 + 0x307c0dfd 0x640147c8 0x04025201 0xe4200780 0x30000003 0x00000280 0x10004401 0x0023c780 - 0x60004e01 0x00220780 0x0002120d 0xc0000780 - 0xdc094805 0x20000780 0x30060009 0xc4100780 - 0x1500e200 0x1500e604 0x20000a09 0x04008780 - 0x3400c00d 0xac200780 0x3401c405 0xac200780 - 0x30020401 0xc4100780 0x30010605 0xac000780 - 0x2000c801 0x04200780 0xd00e0005 0xa0c00781 + 0x60004e05 0x00214780 0x00020e05 0xc0000780 + 0xa0004c01 0x04200780 0x30060205 0xc4100780 + 0xd4094805 0x20000780 0x20018004 0x1500e200 + 0x30020205 0xc4100780 0x2500e008 0x2101e800 + 0xd00e0009 0xa0c00781 } } code { name = cudaEstimateResidual lmem = 0 smem = 3748 - reg = 13 + reg = 12 bar = 1 const { segname = const segnum = 1 offset = 0 - bytes = 24 + bytes = 20 mem { 0x000003ff 0x0000000f 0x0000001f 0x00000001 - 0x007fffff 0x0000000e + 0x007fffff } } bincode { 0xd0800205 0x00400780 0xa0000019 0x04000780 - 0xa0000215 0x04000780 0x30810dfd 0x644107c8 + 0xa0000211 0x04000780 0x30810dfd 0x644107c8 0xa0015003 0x00000000 0xa0004405 0x04200780 - 0x30060a11 0xc4100780 0x10015003 0x00000280 + 0x30060815 0xc4100780 0x10015003 0x00000280 0x40034e01 0x00200780 0x30100001 0xc4100780 - 0x60024e01 0x00200780 0x20000001 0x04014780 + 0x60024e01 0x00200780 0x20000001 0x04010780 0x30070009 0xc4100780 0x30060001 0xc4100780 - 0x20000401 0x04000780 0x30020c0d 0xc4100780 - 0x2100ec00 0x20038808 0x20000601 0x04000780 + 0x20000409 0x04000780 0x30020c01 0xc4100780 + 0x2102ec0c 0x20008a08 0x20000001 0x0400c780 0xd00e0001 0x80c00780 0x00000405 0xc0000780 0x04045201 0xe4200780 0xf0000001 0xe0000002 0x861ffe03 0x00000000 0xa0004c09 0x04200780 0x1000d001 0x0423c780 0x4004020d 0x00000780 0x3010060d 0xc4100780 0x6004000d 0x0000c780 0x1000d001 0x0423c780 0xd0118005 0x20000780 - 0x2100ee20 0x2543e01c 0x30050a01 0xc4100780 - 0x3007101d 0xac000780 0x20000021 0x04018780 + 0x2100ee00 0x2543e01c 0x30050821 0xc4100780 + 0x3007001d 0xac000780 0x20001021 0x04018780 0x30080ffd 0x6c00c7c8 0xa002e003 0x00000000 0x1002d003 0x00000280 0xd0115005 0x20000780 0x2503e000 0x20009000 0x30020001 0xc4100780 0x2000ca01 0x04200780 0xd00e0025 0x80c00780 0x1400d401 0x0423c780 0x30001201 0xec000780 0x1002e003 0x00000780 0x1000f801 0x0403c780 - 0x00021009 0xc0000782 0x308211fd 0x6c4107c8 - 0x08001201 0xe4200780 0xa0043003 0x00000000 + 0x00021005 0xc0000782 0x308211fd 0x6c4107c8 + 0x04001201 0xe4200780 0xa0043003 0x00000000 0x10043003 0x00000280 0x2000d001 0x04220780 - 0x300701fd 0x6c0187c8 0x00020005 0xc0000780 + 0x300701fd 0x6c0187c8 0x00020009 0xc0000780 0xa0042003 0x00000000 0x10041003 0x00000280 - 0xd011500d 0x20000780 0x2108f000 0x2d03e020 - 0x20001001 0x04000780 0x30020001 0xc4100780 - 0x2000ca01 0x04200780 0xd00e0021 0x80c00780 - 0x1c00d401 0x0423c780 0x30001001 0xec000780 + 0xd011500d 0x20000780 0x2108f020 0x2d03e000 + 0x20000001 0x04020780 0x30020001 0xc4100780 + 0x2000ca01 0x04200780 0xd00e000d 0x80c00780 + 0x1c00d401 0x0423c780 0x30000601 0xec000780 0x10042003 0x00000780 0x1000f801 0x0403c780 - 0x04001201 0xe4200782 0xf0000001 0xe0000002 - 0x861ffe03 0x00000000 0x08025201 0xe43f0780 - 0x00000805 0xc0000780 0xd4114805 0x20000780 - 0x3406c1fd 0x6420c7c8 0xa0058003 0x00000000 + 0x08001201 0xe4200782 0xf0000001 0xe0000002 + 0x861ffe03 0x00000000 0x04025201 0xe43f0780 + 0x00000a09 0xc0000780 0xd8114809 0x20000780 + 0x3806c1fd 0x6420c7c8 0xa0058003 0x00000000 0x10057003 0x00000280 0x40034e01 0x00200780 0x30100001 0xc4100780 0x60024e01 0x00200780 - 0x20000001 0x04014780 0x30070021 0xc4100780 - 0x30060001 0xc4100780 0x20001001 0x04000780 - 0x30020c21 0xc4100780 0x2100ec00 0x20009000 + 0x20000001 0x04010780 0x3007000d 0xc4100780 + 0x30060001 0xc4100780 0x2000060d 0x04000780 + 0x30020c01 0xc4100780 0x2103ec0c 0x20038000 0x20008001 0x00000007 0xd00e0001 0x80c00780 0x10058003 0x00000780 0x1000f801 0x0403c780 - 0x08055201 0xe4200782 0x00000805 0xc0000780 - 0xd411480d 0x20000780 0x3c83ca01 0x6c608780 - 0xd0118005 0x20000780 0xd000022d 0x04000780 - 0x2440c001 0x0420c780 0x300117fd 0x640187c8 - 0xa0093003 0x00000000 0x3c00c00d 0x04200780 - 0x1008f003 0x00000280 0x30070a01 0xc4100780 - 0x20248001 0x00000003 0x00000005 0xc0000780 - 0x04000031 0x40000780 0x00000805 0xc0000780 - 0xd4114805 0x20000780 0x30051601 0xc4100780 - 0x3500e021 0x00000003 0x20000c01 0x04000780 - 0x307c11fd 0x6c0187c8 0xa007b003 0x00000000 - 0x1000f825 0x0403c780 0x2400c021 0x04200780 - 0x1007b003 0x00000280 0x20098029 0x00000003 - 0x0002140d 0xc0000780 0x00001805 0xc0000780 - 0xd4150011 0x20000780 0x1000c029 0x0423c784 - 0x20018001 0x00000003 0x6e0ac225 0x80224780 - 0x300801fd 0x6c0147c8 0xd4000805 0x20000780 - 0x10074003 0x00000280 0x00000805 0xc0000782 - 0xd4115805 0x20000780 0x1400c001 0x0423c780 - 0x30001201 0xec000780 0x00021005 0xc0000780 - 0x2440d225 0x04200780 0x301f1201 0xec100780 - 0x30011225 0xc4100780 0x30080e21 0x6c010780 - 0xd8094805 0x20000780 0xd0090025 0x04008780 - 0xa0001021 0x2c014780 0x1400c001 0x0423c780 - 0x30841225 0xac400780 0x2001962d 0x00000003 - 0x60091001 0x80000780 0x300117fd 0x640147c8 - 0x08025201 0xe4200780 0x10067003 0x00000280 - 0x10093003 0x00000780 0x30070a01 0xc4100780 - 0x20248001 0x00000003 0x00000005 0xc0000780 - 0x04000031 0x40000780 0xd8094805 0x20000782 - 0x1400d01d 0x0423c780 0x1400f001 0x0423c780 - 0x2400c01d 0x0421c780 0x2400e001 0x04200780 - 0x20000e01 0x04000780 0x08025201 0xe4200780 - 0x1500e41c 0x1500ec00 0x2507e01c 0x2500e800 - 0x20000e01 0x04000780 0x08025201 0xe4200780 - 0x1500e200 0x2500e000 0x08025201 0xe4200780 - 0x00000805 0xc0000780 0xd4117005 0x20000780 - 0x347cc1fd 0x6c2147c8 0xa00ac003 0x00000000 - 0x100a9003 0x00000280 0x00001805 0xc0000780 - 0xd4090005 0x20000780 0x347cc1fd 0x6c2087c8 - 0x100ab003 0x00000280 0x10018001 0x00000003 - 0x100ac003 0x00000780 0x1000f801 0x0403c780 - 0x3003d00d 0xac200782 0x20018c11 0x00000003 - 0x307c060d 0x8c000780 0x40090c1d 0x00000780 - 0x60080e21 0x0001c780 0x30850c1d 0x64410780 - 0x00001805 0xc0000780 0xd4090005 0x20000780 - 0x30101021 0xc4100780 0xa0000e1d 0x2c014780 - 0x30010625 0xec100780 0x60080c11 0x00020780 - 0x407f8e0d 0x0007ffff 0x2549e01c 0x2003880c - 0x30060e11 0xec000780 0x2000080d 0x0400c780 - 0x40070011 0x00000780 0x60060211 0x00010780 - 0x30100811 0xc4100780 0x60060001 0x00010780 - 0x08025201 0xe4200780 0xd8094805 0x20000780 - 0x1500e80c 0x1500f800 0x3403c00d 0xac200780 - 0x3400d001 0xac200780 0x30000601 0xac000780 - 0x307c0dfd 0x640147c8 0x08025201 0xe4200780 - 0x30000003 0x00000280 0x40034e01 0x00200780 - 0x30100001 0xc4100780 0x60024e01 0x00200780 - 0x20000001 0x04014780 0xd8094805 0x20000780 - 0x3006000d 0xc4100780 0x1500e204 0x1500e600 - 0x20000409 0x0400c780 0x3401c005 0xac200780 - 0x3400c401 0xac200780 0x30020409 0xc4100780 - 0x30000201 0xac000780 0x2000c805 0x04208780 - 0xd00e0201 0xa0c00781 + 0x04055201 0xe4200782 0x00000a09 0xc0000780 + 0xd8117009 0x20000780 0x3883c001 0x6c608780 + 0xd0000221 0x04000780 0x300111fd 0x640187c8 + 0xa0089003 0x00000000 0x10089003 0x00000280 + 0x00000a09 0xc0000780 0xd8114809 0x20000780 + 0x30051001 0xc4100780 0x3900e00d 0x00000003 + 0x20000c01 0x04000780 0x307c07fd 0x6c0187c8 + 0xa0076003 0x00000000 0x1000f829 0x0403c780 + 0x2800c025 0x04200780 0x10076003 0x00000280 + 0x3007082d 0xc4100780 0x2009800d 0x00000003 + 0x2024962d 0x00000003 0x00020609 0xc0000780 + 0x0000160d 0xc0000780 0xdc150011 0x20000780 + 0x1000c00d 0x0423c784 0x20018001 0x00000003 + 0x6a03c229 0x80228780 0x300901fd 0x6c0147c8 + 0xdc00080d 0x20000780 0x1006f003 0x00000280 + 0x00000a09 0xc0000782 0xd8115809 0x20000780 + 0x1800c001 0x0423c780 0x30001401 0xec000780 + 0x00021209 0xc0000780 0x2840d20d 0x04200780 + 0x301f0601 0xec100780 0x30010629 0xc4100780 + 0x30090e0d 0x6c010780 0xd4094809 0x20000780 + 0xd00a0025 0x04008780 0xa000060d 0x2c014780 + 0x1800c001 0x0423c780 0x30841225 0xac400780 + 0x20019021 0x00000003 0x60090601 0x80000780 + 0x300111fd 0x640147c8 0x04025201 0xe4200780 + 0x10060003 0x00000280 0xd4094809 0x20000782 + 0x1800d001 0x0423c780 0x1800f00d 0x0423c780 + 0x2800c001 0x04200780 0x2800e00d 0x0420c780 + 0x20000001 0x0400c780 0x04025201 0xe4200780 + 0x1900e400 0x1900ec0c 0x2900e000 0x2903e80c + 0x20000001 0x0400c780 0x307c0dfd 0x640147c8 + 0x04025201 0xe4200780 0x30000003 0x00000280 + 0x40034e01 0x00200780 0x30100001 0xc4100780 + 0x60024e01 0x00200780 0x20000001 0x04010780 + 0x30060001 0xc4100780 0xd4094805 0x20000780 + 0x20008404 0x1500e200 0x30020205 0xc4100780 + 0x2500e008 0x2101e800 0xd00e0009 0xa0c00781 } } code { @@ -1187,158 +1101,173 @@ code { code { name = cudaChooseBestMethod lmem = 0 - smem = 4128 - reg = 13 + smem = 4132 + reg = 14 bar = 1 const { segname = const segnum = 1 offset = 0 - bytes = 28 + bytes = 48 mem { - 0x000003ff 0x00000008 0x00000001 0x00000020 - 0x0000007f 0x0000003f 0x0000001f + 0x000003ff 0x7e800000 0x3f000000 0x0000000e + 0x00000008 0x00000001 0x00000020 0x0000007f + 0x0000003f 0x0000001f 0x00000024 0x3e800000 } } bincode { 0xd0800205 0x00400780 0xa000020d 0x04000780 0xa0000005 0x04000780 0x30050601 0xc4100780 0x20000211 0x04000780 0x103f8001 0x07ffffff - 0x00020805 0xc0000780 0x307ccffd 0x6c20c7c8 - 0x04011001 0xe4200780 0x00070609 0xc0000780 - 0x10096003 0x00000280 0xa0004415 0x04200780 + 0x00020805 0xc0000780 0x307cd1fd 0x6c20c7c8 + 0x04011201 0xe4200780 0x00070609 0xc0000780 + 0x100b3003 0x00000280 0xa0004415 0x04200780 0x1000f819 0x0403c780 0x20000c1d 0x0400c780 - 0x3007cffd 0x6420c7c8 0xa0093003 0x00000000 - 0x10093003 0x00000280 0x1000ce01 0x0423c780 + 0x3007d1fd 0x6420c7c8 0xa00b0003 0x00000000 + 0x100b0003 0x00000280 0x1000d001 0x0423c780 0x40014e09 0x00200780 0x30100409 0xc4100780 - 0x60004e21 0x00208780 0x30070601 0xc4100780 + 0x60004e09 0x00208780 0x30070601 0xc4100780 0x30070c2d 0xc4100780 0x30060c31 0xc4100780 - 0x30060629 0xc4100780 0x30071025 0xc4100780 - 0x30061009 0xc4100780 0x200c962c 0x200a8000 - 0x20029224 0x210be828 0x30020209 0xc4100780 - 0x200a802c 0x20098428 0x20028024 0x200b9408 - 0xd00e0409 0x80c00780 0x00000005 0xc0000780 - 0x0000120d 0xc0000780 0x307ccdfd 0x6c20c7c8 - 0x0c051001 0xe4208780 0x1000f809 0x0403c780 - 0x10038003 0x00000280 0xa0037003 0x00000000 - 0x10008200 0x2101ec24 0xa0004229 0x04200780 - 0x3000cdfd 0x6420c7c8 0xa0034003 0x00000000 - 0x10034003 0x00000280 0x20000e2d 0x04020780 - 0x3006162d 0xc4100780 0x2000002d 0x0402c780 - 0x3002162d 0xc4100780 0x2000ca2d 0x0422c780 - 0xd00e162d 0x80c00780 0x20001609 0x04008780 - 0x20000001 0x04028782 0x300901fd 0x6c0047c8 - 0x1002a003 0x00000280 0xf0000001 0xe0000002 - 0x0002080d 0xc0000780 0x0c031001 0xe4208780 - 0xdc0c4011 0x20000780 0x1000e001 0x0423c784 - 0x2000c001 0x04200784 0x0c031001 0xe4200780 - 0x1000d001 0x0423c784 0x2000c001 0x04200784 - 0x0c031001 0xe4200780 0x1000c801 0x0423c784 - 0x2000c001 0x04200784 0x0c031001 0xe4200780 - 0x1000c401 0x0423c784 0x2000c001 0x04200784 - 0x0c031001 0xe4200780 0x1000c201 0x0423c784 - 0x2000c001 0x04200784 0x307c03fd 0x640147c8 - 0x0c031001 0xe4200780 0x10093003 0x00000280 - 0xd414680d 0x20000780 0x1d00ec08 0x1d00e400 - 0x2c40c209 0x04208780 0x40050021 0x00000780 - 0x60040221 0x00020780 0x30101021 0xc4100780 - 0x3c81c1fd 0x6c6147c8 0x60040021 0x00020780 - 0xa0090003 0x00000000 0x10066003 0x00000280 - 0xd4144005 0x20000780 0x1400c001 0x0423c780 - 0x3002cc25 0xc4300780 0x4005002d 0x00000780 - 0x301f1229 0xec100780 0x6004022d 0x0002c780 - 0xd0821429 0x04400780 0x3010162d 0xc4100780 - 0x20001425 0x04024780 0x60040009 0x0002c780 - 0x30011201 0xec100780 0x20000401 0x04000780 - 0xd80c4005 0x20000780 0x2400c001 0x04200780 - 0x20068001 0x00000003 0x10090003 0x00000780 - 0xd414680d 0x20000780 0x3c83c1fd 0x6c6147c8 - 0xa008f003 0x00000000 0x1007a003 0x00000280 - 0xd4144005 0x20000780 0x2502e608 0x1500e000 - 0x3002cc25 0xc4300780 0x40050029 0x00000780 - 0x301f122d 0xec100780 0x60040229 0x00028780 - 0xd082162d 0x04400780 0x30101429 0xc4100780 - 0x20001625 0x04024780 0x60040001 0x00028780 - 0x30011209 0xec100780 0x20000001 0x04008780 - 0xd80c4005 0x20000780 0x2400c001 0x04200780 - 0x200f8001 0x00000003 0x1008f003 0x00000780 - 0xd414680d 0x20000780 0x3c7cc1fd 0x6c2147c8 - 0xa008e003 0x00000000 0x10088003 0x00000280 - 0xd80c400d 0x20000780 0xd4147805 0x20000780 - 0x3c7cc1fd 0x6c2087c8 0x2501e001 0x00000003 - 0x10000401 0x2440c280 0x40050025 0x00000780 - 0x60040225 0x00024780 0x30101225 0xc4100780 - 0x60040001 0x00024780 0x1008e003 0x00000780 - 0xd4147805 0x20000780 0x1400c001 0x0423c780 + 0x30060629 0xc4100780 0x30070425 0xc4100780 + 0x30060421 0xc4100780 0x200c962c 0x200a8000 + 0x20089224 0x210be828 0x30020221 0xc4100780 + 0x200a802c 0x20099028 0x20088024 0x200b9420 + 0xd00e1021 0x80c00780 0x00000005 0xc0000780 + 0x0000120d 0xc0000780 0x0c051201 0xe4220780 + 0x3001cffd 0x6c20c7c8 0xa0055003 0x00000000 + 0x10000201 0x0403c780 0x1000f821 0x0403c780 + 0x10055003 0x00000280 0x30060c25 0xc4100780 + 0x30060629 0xc4100780 0x20001225 0x04028780 + 0x30060409 0xc4100780 0x20000425 0x04024780 + 0xa0004229 0x04200780 0xd414480d 0x20000780 + 0x1c00ce09 0x0423c780 0x6000cc09 0x88208780 + 0x3c7ccbfd 0x6c2147c8 0x3d02e02c 0x20009208 + 0x30020409 0xc4100780 0x2000ca09 0x04208780 + 0xd00e0409 0x80c00780 0xa003e003 0x00000000 + 0x1003b003 0x00000280 0x307c05fd 0x6c0087c8 + 0x1003d003 0x00000280 0x10018031 0x00000003 + 0x1003e003 0x00000780 0x1000f831 0x0403c780 + 0x300bcc2d 0xac200782 0x307c162d 0x8c000780 + 0x400c162d 0x00018780 0xa0001631 0x44014780 + 0xb03d1831 0x0358637b 0xa0000435 0x44014780 + 0xb08119fd 0x605107c8 0xb03d1a35 0x0358637b + 0xc08b1a35 0x00400680 0xc08b1831 0x00400680 + 0x90001831 0x00000780 0xe10c1a31 0x00408780 + 0x90001831 0x60000780 0xa0001831 0x8c064780 + 0x30831831 0xac400780 0x307c1831 0x8c000780 + 0x300c0409 0xec000780 0x20019831 0x00000003 + 0x20000001 0x04028780 0x600c1609 0x80008780 + 0x3000cffd 0x6c2107c8 0x20001021 0x04008780 + 0x1002f003 0x00000280 0x0002080d 0xc0000782 + 0x0c031201 0xe4220780 0xdc0c4811 0x20000780 + 0x1000e001 0x0423c784 0x2000c001 0x04200784 + 0x0c031201 0xe4200780 0x1000d001 0x0423c784 + 0x2000c001 0x04200784 0x0c031201 0xe4200780 + 0x1000c801 0x0423c784 0x2000c001 0x04200784 + 0x0c031201 0xe4200780 0x1000c401 0x0423c784 + 0x2000c001 0x04200784 0x0c031201 0xe4200780 + 0x1000c201 0x0423c784 0x2000c001 0x04200784 + 0x307c03fd 0x640147c8 0x0c031201 0xe4200780 + 0x100b0003 0x00000280 0xd414700d 0x20000780 + 0x1d00ec08 0x1d00e400 0x2c40c209 0x04208780 + 0x40050021 0x00000780 0x60040221 0x00020780 + 0x30101021 0xc4100780 0x3c84c1fd 0x6c6147c8 + 0x60040021 0x00020780 0xa00ad003 0x00000000 + 0x10083003 0x00000280 0xd4144805 0x20000780 + 0x1400c001 0x0423c780 0x3002ce25 0xc4300780 + 0x4005002d 0x00000780 0x301f1229 0xec100780 + 0x6004022d 0x0002c780 0xd0851429 0x04400780 + 0x3010162d 0xc4100780 0x20001425 0x04024780 + 0x60040009 0x0002c780 0x30011201 0xec100780 + 0x20000401 0x04000780 0xd80c4805 0x20000780 + 0x2400c001 0x04200780 0x20068001 0x00000003 + 0x100ad003 0x00000780 0xd414700d 0x20000780 + 0x3c86c1fd 0x6c6147c8 0xa00ac003 0x00000000 + 0x10097003 0x00000280 0xd4144805 0x20000780 + 0x2502e608 0x1500e000 0x3002ce25 0xc4300780 + 0x40050029 0x00000780 0x301f122d 0xec100780 + 0x60040229 0x00028780 0xd085162d 0x04400780 + 0x30101429 0xc4100780 0x20001625 0x04024780 + 0x60040001 0x00028780 0x30011209 0xec100780 + 0x20000001 0x04008780 0xd80c4805 0x20000780 + 0x2400c001 0x04200780 0x200f8001 0x00000003 + 0x100ac003 0x00000780 0xd414700d 0x20000780 + 0x3c7cc1fd 0x6c2147c8 0xa00ab003 0x00000000 + 0x100a5003 0x00000280 0xd80c480d 0x20000780 + 0xd4148005 0x20000780 0x3c7cc1fd 0x6c2087c8 + 0x2501e001 0x00000003 0x10000a01 0x2440c280 0x40050025 0x00000780 0x60040225 0x00024780 0x30101225 0xc4100780 0x60040001 0x00024780 - 0xf0000001 0xe0000002 0xf0000001 0xe0000002 - 0x30080001 0xac000782 0x00020e05 0xc0000780 - 0x04011001 0xe4200780 0x20000c19 0x04014782 - 0x3006cffd 0x6c2107c8 0x1000d003 0x00000280 - 0x861ffe03 0x00000000 0x3004cffd 0x6c20c7c8 - 0xa00a7003 0x00000000 0x100a7003 0x00000280 - 0x1000ce01 0x0423c780 0x40014e05 0x00200780 - 0x30100205 0xc4100780 0x60004e01 0x00204780 - 0x20000001 0x04010780 0x30070005 0xc4100780 - 0x30060001 0xc4100780 0x00020805 0xc0000780 - 0x20000201 0x04000780 0xd4044005 0x20000780 - 0x2100e804 0x1500e000 0x20108205 0x00000003 - 0xd00e0201 0xa0c00780 0xf0000001 0xe0000002 - 0x861ffe03 0x00000000 0x00020805 0xc0000780 - 0xd4044005 0x20000780 0x308409fd 0x6c4107c8 - 0xa00ba003 0x00000000 0x1400c001 0x0423c780 - 0x100ba003 0x00000280 0x00020805 0xc0000780 - 0xd408400d 0x20000780 0xd4044009 0x20000780 - 0x1c00c001 0x0423c780 0x3800c1fd 0x6c2107c8 - 0x1c00c001 0x0423c780 0x20008805 0x0000000b - 0x3800c001 0xac200780 0x10000805 0x0403c500 - 0x04001001 0xe4204780 0x04011001 0xe4200780 + 0x100ab003 0x00000780 0xd4148005 0x20000780 + 0x1400c001 0x0423c780 0x40050025 0x00000780 + 0x60040225 0x00024780 0x30101225 0xc4100780 + 0x60040001 0x00024780 0xf0000001 0xe0000002 + 0xf0000001 0xe0000002 0x30080001 0xac000782 + 0x00020e05 0xc0000780 0x04011201 0xe4200780 + 0x20000c19 0x04014782 0x3006d1fd 0x6c2107c8 + 0x1000d003 0x00000280 0x861ffe03 0x00000000 + 0x3004d1fd 0x6c20c7c8 0xa00c4003 0x00000000 + 0x100c4003 0x00000280 0x1000d001 0x0423c780 + 0x40014e05 0x00200780 0x30100205 0xc4100780 + 0x60004e01 0x00204780 0x20000001 0x04010780 + 0x30070005 0xc4100780 0x30060001 0xc4100780 + 0x00020805 0xc0000780 0x20000201 0x04000780 + 0xd4044805 0x20000780 0x2100e804 0x1500e000 + 0x20108205 0x00000003 0xd00e0201 0xa0c00780 0xf0000001 0xe0000002 0x861ffe03 0x00000000 - 0x308509fd 0x6c4107c8 0xa00c9003 0x00000000 - 0x100c9003 0x00000280 0x00020805 0xc0000780 - 0xd4064009 0x20000780 0x20008805 0x00000007 - 0x3800c1fd 0x6c2047c8 0x10000805 0x0403c500 - 0x0002020d 0xc0000780 0x3800c001 0xac200780 - 0x1c00d005 0x0423c780 0x04001001 0xe4204780 - 0x04011001 0xe4200780 0xf0000001 0xe0000002 - 0x861ffe03 0x00000000 0x308609fd 0x6c4107c8 - 0x30000003 0x00000280 0x20288805 0x0000000b - 0x00020209 0xc0000780 0x20208805 0x00000003 - 0x3800c1fd 0x6c2047c8 0x10000805 0x0403c500 - 0x0002020d 0xc0000780 0x00020805 0xc0000780 - 0x3800c005 0xac200780 0x1c00d001 0x0423c780 - 0x20188809 0x0000000b 0x04001001 0xe4200780 - 0x00020409 0xc0000780 0x04011001 0xe4204780 - 0x20108801 0x00000003 0x3801c1fd 0x6c2047c8 - 0x10000801 0x0403c500 0x0002000d 0xc0000780 - 0x3801c005 0xac200780 0x1c00d001 0x0423c780 - 0x20108809 0x0000000b 0x04001001 0xe4200780 - 0x00020409 0xc0000780 0x04011001 0xe4204780 - 0x20088801 0x00000003 0x3801c1fd 0x6c2047c8 - 0x10000801 0x0403c500 0x0002000d 0xc0000780 - 0x3801c005 0xac200780 0x1c00d001 0x0423c780 - 0x200c8809 0x0000000b 0x04001001 0xe4200780 - 0x00020409 0xc0000780 0x04011001 0xe4204780 - 0x20048801 0x00000003 0x3801c1fd 0x6c2047c8 - 0x10000801 0x0403c500 0x0002000d 0xc0000780 - 0x3801c005 0xac200780 0x1c00d001 0x0423c780 - 0x200a8809 0x0000000b 0x04001001 0xe4200780 - 0x00020409 0xc0000780 0x04011001 0xe4204780 - 0x20028801 0x00000003 0x3801c1fd 0x6c2047c8 - 0x10000801 0x0403c500 0x0002000d 0xc0000780 - 0x3801c005 0xac200780 0x1c00d001 0x0423c780 - 0x04001001 0xe4200780 0x307c09fd 0x6c0147c8 - 0x04011001 0xe4204780 0x30000003 0x00000280 - 0x1000ce01 0x0423c780 0x40014e05 0x00200780 - 0xd0044005 0x20000780 0x30100209 0xc4100780 - 0x1400c205 0x0423c780 0x60004e09 0x00208780 - 0x3401c1fd 0x6c2107c8 0x10048011 0x00000003 - 0x10208001 0x00000003 0x30070405 0xc4100780 - 0x3006040d 0xc4100780 0x21000801 0x0440c280 - 0x20000205 0x0400c780 0x00000005 0xc0000780 - 0x2101e800 0x2502e004 0x20208001 0x00000003 - 0xd00e0005 0xa0c00781 + 0x00020805 0xc0000780 0xd4044805 0x20000780 + 0x308709fd 0x6c4107c8 0xa00d7003 0x00000000 + 0x1400c001 0x0423c780 0x100d7003 0x00000280 + 0x00020805 0xc0000780 0xd408480d 0x20000780 + 0xd4044809 0x20000780 0x1c00c001 0x0423c780 + 0x3800c1fd 0x6c2107c8 0x1c00c001 0x0423c780 + 0x20008805 0x0000000b 0x3800c001 0xac200780 + 0x10000805 0x0403c500 0x04001201 0xe4204780 + 0x04011201 0xe4200780 0xf0000001 0xe0000002 + 0x861ffe03 0x00000000 0x308809fd 0x6c4107c8 + 0xa00e6003 0x00000000 0x100e6003 0x00000280 + 0x00020805 0xc0000780 0xd4064809 0x20000780 + 0x20008805 0x00000007 0x3800c1fd 0x6c2047c8 + 0x10000805 0x0403c500 0x0002020d 0xc0000780 + 0x3800c001 0xac200780 0x1c00d205 0x0423c780 + 0x04001201 0xe4204780 0x04011201 0xe4200780 + 0xf0000001 0xe0000002 0x861ffe03 0x00000000 + 0x308909fd 0x6c4107c8 0x30000003 0x00000280 + 0x20298805 0x0000000b 0x00020209 0xc0000780 + 0x20208805 0x00000003 0x3800c1fd 0x6c2047c8 + 0x10000805 0x0403c500 0x0002020d 0xc0000780 + 0x00020805 0xc0000780 0x3800c005 0xac200780 + 0x1c00d201 0x0423c780 0x20198809 0x0000000b + 0x04001201 0xe4200780 0x00020409 0xc0000780 + 0x04011201 0xe4204780 0x20108801 0x00000003 + 0x3801c1fd 0x6c2047c8 0x10000801 0x0403c500 + 0x0002000d 0xc0000780 0x3801c005 0xac200780 + 0x1c00d201 0x0423c780 0x20118809 0x0000000b + 0x04001201 0xe4200780 0x00020409 0xc0000780 + 0x04011201 0xe4204780 0x20088801 0x00000003 + 0x3801c1fd 0x6c2047c8 0x10000801 0x0403c500 + 0x0002000d 0xc0000780 0x3801c005 0xac200780 + 0x1c00d201 0x0423c780 0x200d8809 0x0000000b + 0x04001201 0xe4200780 0x00020409 0xc0000780 + 0x04011201 0xe4204780 0x20048801 0x00000003 + 0x3801c1fd 0x6c2047c8 0x10000801 0x0403c500 + 0x0002000d 0xc0000780 0x3801c005 0xac200780 + 0x1c00d201 0x0423c780 0x200b8809 0x0000000b + 0x04001201 0xe4200780 0x00020409 0xc0000780 + 0x04011201 0xe4204780 0x20028801 0x00000003 + 0x3801c1fd 0x6c2047c8 0x10000801 0x0403c500 + 0x0002000d 0xc0000780 0x3801c005 0xac200780 + 0x1c00d201 0x0423c780 0x04001201 0xe4200780 + 0x307c09fd 0x6c0147c8 0x04011201 0xe4204780 + 0x30000003 0x00000280 0x1000d001 0x0423c780 + 0x40014e05 0x00200780 0xd0044805 0x20000780 + 0x30100209 0xc4100780 0x1400c205 0x0423c780 + 0x60004e09 0x00208780 0x3401c1fd 0x6c2107c8 + 0x10048011 0x00000003 0x10248001 0x00000003 + 0x30070405 0xc4100780 0x3006040d 0xc4100780 + 0x21000801 0x04428280 0x20000205 0x0400c780 + 0x00000005 0xc0000780 0x2101e800 0x2502e004 + 0x20208001 0x00000003 0xd00e0005 0xa0c00781 } } code { @@ -1867,7 +1796,7 @@ code { name = cudaEstimateResidual1 lmem = 0 smem = 2412 - reg = 9 + reg = 7 bar = 1 const { segname = const @@ -1875,113 +1804,90 @@ code { offset = 0 bytes = 32 mem { - 0x000003ff 0x0000002f 0x0000001f 0x007fffff - 0x0000007f 0x0000003f 0x0000000e 0x00000040 + 0x000003ff 0x00000020 0x0000002f 0x0000001f + 0x007fffff 0x0000007f 0x0000003f 0x00000040 } } bincode { - 0xd0800205 0x00400780 0xa0000209 0x04000780 - 0xa000000d 0x04000780 0x30050401 0xc4100780 - 0x20000611 0x04000780 0x308109fd 0x644107c8 - 0xa0011003 0x00000000 0x30020815 0xc4100780 - 0x10011003 0x00000280 0xa0004e01 0x04200780 - 0x30070005 0xc4100780 0x30060001 0xc4100780 - 0x20008200 0x2100ec00 0x20000a01 0x04000780 - 0xd00e0001 0x80c00780 0x00000a05 0xc0000780 - 0x04045201 0xe4200780 0x307c09fd 0x6c0147ca - 0xa001f003 0x00000000 0x1001f003 0x00000280 - 0x1000d001 0x0423c780 0x40014c05 0x00200780 - 0x30100205 0xc4100780 0x60004c01 0x00204780 - 0x0004b201 0xe4200780 0xd012c809 0x20000780 - 0xd0118005 0x20000780 0x1100f000 0x1900e004 - 0x2100ee18 0x2541e000 0x30060001 0xac000780 - 0x0004b401 0xe4200780 0xf0000001 0xe0000002 - 0x861ffe03 0x00000000 0xd012d005 0x20000780 - 0x3404c1fd 0x6c20c7c8 0xa0030003 0x00000000 - 0x1002f003 0x00000280 0xd012c809 0x20000780 - 0xd0115005 0x20000780 0x1900e000 0x2500e000 - 0x20000801 0x04000780 0x30020001 0xc4100780 - 0x2000ca01 0x04200780 0xd00e0005 0x80c00780 - 0x1400d401 0x0423c780 0x30000201 0xec000780 - 0x10030003 0x00000780 0x1000f801 0x0403c780 - 0x00000a05 0xc0000782 0x30820805 0x6c40c7d0 - 0x04001201 0xe4200780 0xa00003fd 0x0c0147c8 - 0xa0048003 0x00000000 0x10048003 0x00001100 - 0xd012d005 0x20000780 0x2000d001 0x04210780 - 0x3400c1fd 0x6c20c7d8 0x00020005 0xc0000780 - 0xa0047003 0x00000000 0x10046003 0x00001280 - 0xd012c80d 0x20000780 0xd0115009 0x20000780 - 0x1d00e000 0x2104f004 0x2900e000 0x20008200 - 0x30020001 0xc4100780 0x2000ca01 0x04200780 - 0xd00e0005 0x80c00780 0x1800d401 0x0423c780 - 0x30000201 0xec000780 0x10047003 0x00000780 - 0x1000f801 0x0403c780 0x04001201 0xe4200782 - 0xf0000001 0xe0000002 0x861ffe03 0x00000000 - 0xd0114805 0x20000780 0x3500e001 0x00000003 - 0x307c01fd 0x6c0187d8 0x1000f819 0x0403c780 - 0x1005e003 0x00001280 0xd0114809 0x20000780 - 0x20098805 0x00000003 0x3802c001 0xc4300780 - 0x00020205 0xc0000780 0x1024801d 0x00000003 - 0x2800c005 0x04210780 0x20248021 0x00000003 - 0x00000e09 0xc0000780 0xd8118009 0x20000780 - 0x1800c001 0x0423c780 0x20048e1d 0x00000003 - 0x6600c219 0x80218780 0x30080ffd 0x640147d8 - 0x10056003 0x00001280 0x10060003 0x00000780 - 0xd0114805 0x20000780 0x2400c005 0x04210780 - 0xd0115805 0x20000780 0x1400c001 0x0423c780 - 0x30000c01 0xec000780 0x00020205 0xc0000780 - 0x2440d201 0x04200780 0xd012d005 0x20000780 - 0x301f0019 0xec100780 0x3001001d 0xc4100780 - 0x3401c001 0x6c210780 0xd0070c05 0x04008780 - 0xa0000001 0x2c014780 0x30830205 0xac400780 - 0x40010001 0x00018780 0x00000a05 0xc0000780 - 0x04025201 0xe4200780 0x861ffe03 0x00000000 - 0x308409fd 0x6c4107d8 0x00000a05 0xc0001500 - 0xd40d480d 0x20001500 0xd4094809 0x20001500 - 0x1c00c001 0x0423d500 0x2800c001 0x04201500 - 0x04025201 0xe4201500 0x861ffe03 0x00000000 - 0x308509fd 0x6c4107d8 0x00000a05 0xc0001500 - 0xd40b480d 0x20001500 0xd4094809 0x20001500 - 0x1c00c001 0x0423d500 0x2800c001 0x04201500 - 0x04025201 0xe4201500 0x861ffe03 0x00000000 - 0x00000a05 0xc0000680 0xd40a480d 0x20000680 - 0xd4094809 0x20000680 0x1c00c001 0x0423c680 - 0x2800c001 0x04200680 0x04025201 0xe4200680 - 0x861ffe03 0x00000000 0xa0096003 0x00000000 - 0x10096003 0x00000100 0x00000a05 0xc0000780 - 0xd4094809 0x20000780 0x1800e001 0x0423c780 - 0x2800c001 0x04200780 0x04025201 0xe4200780 - 0x1900f000 0x2900e000 0x04025201 0xe4200780 - 0x1900e800 0x2900e000 0x04025201 0xe4200780 - 0x1900e400 0x2900e000 0x04025201 0xe4200780 - 0x1900e200 0x2900e000 0x04025201 0xe4200780 - 0x307c05fd 0x640147ca 0x30000003 0x00000280 + 0xd0800209 0x00400780 0xa0000001 0x04000780 + 0x60200401 0x00000003 0x308201fd 0x644107c8 + 0xa000f003 0x00000000 0x3002000d 0xc4100780 + 0x1000f003 0x00000280 0xa0004e05 0x04200780 + 0x30070209 0xc4100780 0x30060205 0xc4100780 + 0x20018404 0x2101ec04 0x20000605 0x04004780 + 0xd00e0205 0x80c00780 0x00000605 0xc0000780 + 0x04045201 0xe4204780 0x307c0005 0x6c0087d2 + 0xa00003fd 0x0c0147c8 0xa001e003 0x00000000 + 0x1001e003 0x00001100 0x1000d005 0x0423c780 + 0x40034c09 0x00200780 0x30100409 0xc4100780 + 0x60024c05 0x00208780 0x0004b201 0xe4204780 0xd012c809 0x20000780 0xd0114805 0x20000780 - 0x1900e000 0x2540ee00 0x347ccbfd 0x6c2147c8 - 0x3400c005 0x04200780 0x100a1003 0x00000280 - 0xd0094805 0x20000780 0x347cc1fd 0x6c2087c8 - 0x100a3003 0x00000280 0x10018001 0x00000003 - 0x100a4003 0x00000780 0x1000f801 0x0403c780 - 0x3001d009 0xac200780 0x20018605 0x00000003 - 0x307c0409 0x8c000780 0x40030811 0x00000780 - 0x60020a11 0x00010780 0x30860619 0x64410780 - 0xd0094805 0x20000780 0x30100815 0xc4100780 - 0xa0000c19 0x2c014780 0x30010411 0xec100780 - 0x60020809 0x00014780 0x407f8c15 0x0007ffff - 0x2544e004 0x20058408 0x30030205 0xec000780 - 0x20000205 0x04008780 0x40030009 0x00000780 - 0x60020209 0x00008780 0x30100409 0xc4100780 - 0x60020001 0x00008780 0x00020605 0xc0000780 - 0x04025201 0xe4200780 0xd4094809 0x20000780 - 0x1900e800 0x1900f804 0x3800c001 0xac200780 - 0x3801d005 0xac200780 0x30010001 0xac000780 - 0x307c07fd 0x640147c8 0x04025201 0xe4200780 - 0x30000003 0x00000280 0xd4094805 0x20000780 - 0xa0004c01 0x04200780 0x1500e204 0x1500e608 - 0x61002e01 0x00000007 0x3401c005 0xac200780 - 0x3402c409 0xac200780 0x30020001 0xc4100780 - 0x30020205 0xac000780 0x2000c801 0x04200780 - 0xd00e0005 0xa0c00781 + 0x1100f004 0x1900e008 0x2501e010 0x2542ee04 + 0x30040205 0xac000780 0x0004b401 0xe4204780 + 0xf0000001 0xe0000002 0x861ffe03 0x00000000 + 0xd012d005 0x20000780 0x3400c1fd 0x6c20c7d8 + 0xa002f003 0x00000000 0x1002e003 0x00001280 + 0xd012c809 0x20000780 0xd0115005 0x20000780 + 0x1900e004 0x2501e004 0x20000005 0x04004780 + 0x30020205 0xc4100780 0x2000ca05 0x04204780 + 0xd00e0209 0x80c00780 0x1400d405 0x0423c780 + 0x30010405 0xec000780 0x1002f003 0x00000780 + 0x1000f805 0x0403c780 0x00000605 0xc0000782 + 0x30830009 0x6c40c7e0 0x04001201 0xe4204780 + 0xa00005fd 0x0c0147d8 0xa0047003 0x00000000 + 0x10047003 0x00002100 0xd012d005 0x20000780 + 0x2000d005 0x04200780 0x3401c1fd 0x6c20c7e8 + 0x00020205 0xc0000780 0xa0046003 0x00000000 + 0x10045003 0x00002280 0xd012c80d 0x20000780 + 0xd0115009 0x20000780 0x1d00e004 0x2100f008 + 0x2901e004 0x20018404 0x30020205 0xc4100780 + 0x2000ca05 0x04204780 0xd00e0209 0x80c00780 + 0x1800d405 0x0423c780 0x30010405 0xec000780 + 0x10046003 0x00000780 0x1000f805 0x0403c780 + 0x04001201 0xe4204782 0xf0000001 0xe0000002 + 0x861ffe03 0x00000000 0xd0114805 0x20000780 + 0x3500e005 0x00000003 0x307c03fd 0x6c0187e8 + 0x1000f811 0x0403c780 0x1005d003 0x00002280 + 0xd0114809 0x20000780 0x20098009 0x00000003 + 0x3802c005 0xc4300780 0x00020405 0xc0000780 + 0x10248015 0x00000003 0x2800c009 0x04200780 + 0x20248219 0x00000003 0x00000a09 0xc0000780 + 0xd8118009 0x20000780 0x1800c005 0x0423c780 + 0x20048a15 0x00000003 0x6601c211 0x80210780 + 0x30060bfd 0x640147e8 0x10055003 0x00002280 + 0x1005f003 0x00000780 0xd0114805 0x20000780 + 0x2400c009 0x04200780 0xd0115805 0x20000780 + 0x1400c005 0x0423c780 0x30010805 0xec000780 + 0x00020405 0xc0000780 0x2440d205 0x04204780 + 0xd012d005 0x20000780 0x301f0211 0xec100780 + 0x30010215 0xc4100780 0x3402c005 0x6c210780 + 0xd0050809 0x04008780 0xa0000205 0x2c014780 + 0x30840409 0xac400780 0x40020205 0x00018780 + 0x00000605 0xc0000780 0x04025201 0xe4204780 + 0x861ffe03 0x00000000 0x308501fd 0x6c4107e8 + 0x00000605 0xc0002500 0xd40d480d 0x20002500 + 0xd4094809 0x20002500 0x1c00c005 0x0423e500 + 0x2800c005 0x04206500 0x04025201 0xe4206500 + 0x861ffe03 0x00000000 0x308601fd 0x6c4107e8 + 0x00000605 0xc0002500 0xd40b480d 0x20002500 + 0xd4094809 0x20002500 0x1c00c001 0x0423e500 + 0x2800c001 0x04202500 0x04025201 0xe4202500 + 0x861ffe03 0x00000000 0x00000605 0xc0001680 + 0xd40a480d 0x20001680 0xd4094809 0x20001680 + 0x1c00c001 0x0423d680 0x2800c001 0x04201680 + 0x04025201 0xe4201680 0x861ffe03 0x00000000 + 0xa0095003 0x00000000 0x10095003 0x00001100 + 0x00000605 0xc0000780 0xd4094809 0x20000780 + 0x1800e001 0x0423c780 0x2800c001 0x04200780 + 0x04025201 0xe4200780 0x1900f000 0x2900e000 + 0x04025201 0xe4200780 0x1900e800 0x2900e000 + 0x04025201 0xe4200780 0x1900e400 0x2900e000 + 0x04025201 0xe4200780 0x1900e200 0x2900e000 + 0x04025201 0xe4200780 0xf0000001 0xe0000002 + 0x30000003 0x00000100 0xa0004c01 0x04200780 + 0x61002e01 0x00000007 0xd0094805 0x20000780 + 0x30020005 0xc4100780 0x1500e000 0x2101e804 + 0xd00e0201 0xa0c00781 } } code {