From f3d0c20c6eb0966c32cd93dfacba75b863fd4a7b Mon Sep 17 00:00:00 2001 From: chudov Date: Thu, 8 Oct 2009 17:22:16 +0000 Subject: [PATCH] optimizations --- CUETools.FlaCuda/FlaCudaWriter.cs | 27 +- CUETools.FlaCuda/flacuda.cu | 232 ++++-------- CUETools.FlaCuda/flacuda.cubin | 606 ++++++++++++------------------ 3 files changed, 341 insertions(+), 524 deletions(-) diff --git a/CUETools.FlaCuda/FlaCudaWriter.cs b/CUETools.FlaCuda/FlaCudaWriter.cs index 0d1d568..aed5909 100644 --- a/CUETools.FlaCuda/FlaCudaWriter.cs +++ b/CUETools.FlaCuda/FlaCudaWriter.cs @@ -1161,10 +1161,11 @@ namespace CUETools.Codecs.FlaCuda cuda.SetParameter(task.cudaComputeLPC, 1 * sizeof(uint), (uint)task.nResidualTasksPerChannel); cuda.SetParameter(task.cudaComputeLPC, 2 * sizeof(uint), (uint)task.cudaAutocorOutput.Pointer); cuda.SetParameter(task.cudaComputeLPC, 3 * sizeof(uint), (uint)eparams.max_prediction_order); - cuda.SetParameter(task.cudaComputeLPC, 4 * sizeof(uint), (uint)task.nTasksPerWindow); - cuda.SetParameter(task.cudaComputeLPC, 5 * sizeof(uint), (uint)autocorPartCount); - cuda.SetParameterSize(task.cudaComputeLPC, 6U * sizeof(uint)); - cuda.SetFunctionBlockShape(task.cudaComputeLPC, 32, 8, 1); + cuda.SetParameter(task.cudaComputeLPC, 4 * sizeof(uint), (uint)task.cudaLPCData.Pointer); + cuda.SetParameter(task.cudaComputeLPC, 5 * sizeof(uint), (uint)_windowcount); + cuda.SetParameter(task.cudaComputeLPC, 6 * sizeof(uint), (uint)autocorPartCount); + cuda.SetParameterSize(task.cudaComputeLPC, 7U * sizeof(uint)); + cuda.SetFunctionBlockShape(task.cudaComputeLPC, 32, 1, 1); cuda.SetParameter(task.cudaComputeLPCLattice, 0, (uint)task.cudaResidualTasks.Pointer); cuda.SetParameter(task.cudaComputeLPCLattice, 1 * sizeof(uint), (uint)task.nResidualTasksPerChannel); @@ -1174,6 +1175,15 @@ namespace CUETools.Codecs.FlaCuda cuda.SetParameterSize(task.cudaComputeLPCLattice, 5U * sizeof(uint)); cuda.SetFunctionBlockShape(task.cudaComputeLPCLattice, 256, 1, 1); + cuda.SetParameter(task.cudaQuantizeLPC, 0, (uint)task.cudaResidualTasks.Pointer); + cuda.SetParameter(task.cudaQuantizeLPC, 1 * sizeof(uint), (uint)task.nResidualTasksPerChannel); + cuda.SetParameter(task.cudaQuantizeLPC, 2 * sizeof(uint), (uint)task.nTasksPerWindow); + cuda.SetParameter(task.cudaQuantizeLPC, 3 * sizeof(uint), (uint)_windowcount); + cuda.SetParameter(task.cudaQuantizeLPC, 4 * sizeof(uint), (uint)task.cudaLPCData.Pointer); + cuda.SetParameter(task.cudaQuantizeLPC, 5 * sizeof(uint), (uint)eparams.max_prediction_order); + cuda.SetParameterSize(task.cudaQuantizeLPC, 6U * sizeof(uint)); + cuda.SetFunctionBlockShape(task.cudaQuantizeLPC, 32, 8, 1); + cuda.SetParameter(cudaEstimateResidual, sizeof(uint) * 0, (uint)task.cudaResidualOutput.Pointer); cuda.SetParameter(cudaEstimateResidual, sizeof(uint) * 1, (uint)task.cudaSamples.Pointer); cuda.SetParameter(cudaEstimateResidual, sizeof(uint) * 2, (uint)task.cudaResidualTasks.Pointer); @@ -1246,6 +1256,7 @@ namespace CUETools.Codecs.FlaCuda cuda.LaunchAsync(task.cudaFindWastedBits, channelsCount * task.frameCount, 1, task.stream); cuda.LaunchAsync(task.cudaComputeAutocor, autocorPartCount, task.nAutocorTasksPerChannel * channelsCount * task.frameCount, task.stream); cuda.LaunchAsync(task.cudaComputeLPC, task.nAutocorTasksPerChannel, channelsCount * task.frameCount, task.stream); + cuda.LaunchAsync(task.cudaQuantizeLPC, 1, channelsCount * task.frameCount, task.stream); } cuda.LaunchAsync(cudaEstimateResidual, residualPartCount, task.nResidualTasksPerChannel * channelsCount * task.frameCount / threads_y, task.stream); cuda.LaunchAsync(task.cudaChooseBestMethod, 1, channelsCount * task.frameCount, task.stream); @@ -1963,7 +1974,8 @@ namespace CUETools.Codecs.FlaCuda public CUfunction cudaFindWastedBits; public CUfunction cudaComputeAutocor; public CUfunction cudaComputeLPC; - public CUfunction cudaComputeLPCLattice; + public CUfunction cudaComputeLPCLattice; + public CUfunction cudaQuantizeLPC; public CUfunction cudaEstimateResidual; public CUfunction cudaEstimateResidual8; public CUfunction cudaEstimateResidual12; @@ -1979,6 +1991,7 @@ namespace CUETools.Codecs.FlaCuda public CUfunction cudaFindPartitionOrder; public CUdeviceptr cudaSamplesBytes; public CUdeviceptr cudaSamples; + public CUdeviceptr cudaLPCData; public CUdeviceptr cudaResidual; public CUdeviceptr cudaPartitions; public CUdeviceptr cudaRiceParams; @@ -2016,10 +2029,12 @@ namespace CUETools.Codecs.FlaCuda samplesBufferLen = sizeof(int) * FlaCudaWriter.MAX_BLOCKSIZE * channelCount; int partitionsLen = sizeof(int) * (30 << 8) * channelCount * FlaCudaWriter.maxFrames; int riceParamsLen = sizeof(int) * (4 << 8) * channelCount * FlaCudaWriter.maxFrames; + int lpcDataLen = sizeof(float) * 32 * 33 * lpc.MAX_LPC_WINDOWS * channelCount * FlaCudaWriter.maxFrames; cudaSamplesBytes = cuda.Allocate((uint)samplesBufferLen / 2); cudaSamples = cuda.Allocate((uint)samplesBufferLen); cudaResidual = cuda.Allocate((uint)samplesBufferLen); + cudaLPCData = cuda.Allocate((uint)lpcDataLen); cudaPartitions = cuda.Allocate((uint)partitionsLen); cudaRiceParams = cuda.Allocate((uint)riceParamsLen); cudaBestRiceParams = cuda.Allocate((uint)riceParamsLen / 4); @@ -2057,6 +2072,7 @@ namespace CUETools.Codecs.FlaCuda cudaChannelDecorr2 = cuda.GetModuleFunction("cudaChannelDecorr2"); cudaFindWastedBits = cuda.GetModuleFunction("cudaFindWastedBits"); cudaComputeLPC = cuda.GetModuleFunction("cudaComputeLPC"); + cudaQuantizeLPC = cuda.GetModuleFunction("cudaQuantizeLPC"); cudaComputeLPCLattice = cuda.GetModuleFunction("cudaComputeLPCLattice"); cudaEstimateResidual = cuda.GetModuleFunction("cudaEstimateResidual"); cudaEstimateResidual8 = cuda.GetModuleFunction("cudaEstimateResidual8"); @@ -2081,6 +2097,7 @@ namespace CUETools.Codecs.FlaCuda { cuda.Free(cudaSamples); cuda.Free(cudaSamplesBytes); + cuda.Free(cudaLPCData); cuda.Free(cudaResidual); cuda.Free(cudaPartitions); cuda.Free(cudaAutocorOutput); diff --git a/CUETools.FlaCuda/flacuda.cu b/CUETools.FlaCuda/flacuda.cu index 3d38431..2efe950 100644 --- a/CUETools.FlaCuda/flacuda.cu +++ b/CUETools.FlaCuda/flacuda.cu @@ -224,175 +224,95 @@ extern "C" __global__ void cudaComputeLPC( int taskCount, // tasks per block float*autoc, int max_order, // should be <= 32 - int taskCount2, // tasks per window function, should be <= max_order - int partCount // should be <= blockDim? + float *lpcs, + int windowCount, + int partCount ) { __shared__ struct { FlaCudaSubframeData task; - union - { - volatile float parts[256]; - volatile int tmpi[256]; - }; - volatile float lpc[33*16]; + volatile float parts[32]; volatile float ldr[32]; volatile float gen1[32]; + volatile float error[32]; volatile float autoc[33]; - volatile float error[64]; - volatile float order[64]; - //volatile float reff[32]; - //int cbits; + volatile int lpcOffs; + volatile int autocOffs; } shared; - const int tid = threadIdx.x + threadIdx.y * 32; + const int tid = threadIdx.x;// + threadIdx.y * 32; // fetch task data if (tid < sizeof(shared.task) / sizeof(int)) ((int*)&shared.task)[tid] = ((int*)(tasks + blockIdx.y * taskCount))[tid]; - __syncthreads(); + if (tid == 0) + { + shared.lpcOffs = (blockIdx.x + blockIdx.y * windowCount) * (max_order + 1) * 32; + shared.autocOffs = (blockIdx.x + blockIdx.y * windowCount) * (max_order + 1) * partCount; + } + //__syncthreads(); // add up autocorrelation parts - for (int order = threadIdx.y; order <= max_order; order += 8) + + // for (int order = threadIdx.x; order <= max_order; order += 32) + // { + //float sum = 0.0f; + //for (int pos = 0; pos < partCount; pos++) + // sum += autoc[shared.autocOffs + pos * (max_order + 1) + order]; + //shared.autoc[order] = sum; + // } + + for (int order = 0; order <= max_order; order ++) { shared.parts[tid] = 0.0f; for (int pos = threadIdx.x; pos < partCount; pos += 32) - shared.parts[tid] += autoc[((blockIdx.y * gridDim.x + blockIdx.x) * partCount + pos) * (max_order + 1) + order]; + shared.parts[tid] += autoc[shared.autocOffs + pos * (max_order + 1) + order]; shared.parts[tid] = shared.parts[tid] + shared.parts[tid + 8] + shared.parts[tid + 16] + shared.parts[tid + 24]; shared.parts[tid] = shared.parts[tid] + shared.parts[tid + 2] + shared.parts[tid + 4] + shared.parts[tid + 6]; if (threadIdx.x == 0) shared.autoc[order] = shared.parts[tid] + shared.parts[tid + 1]; } - __syncthreads(); + //__syncthreads(); // Compute LPC using Schur and Levinson-Durbin recursion if (threadIdx.y == 0) { - float gen0 = shared.gen1[tid] = shared.autoc[tid+1]; - shared.ldr[tid] = 0.0f; + float gen0 = shared.gen1[threadIdx.x] = shared.autoc[threadIdx.x+1]; + shared.ldr[threadIdx.x] = 0.0f; float error = shared.autoc[0]; for (int order = 0; order < max_order; order++) { // Schur recursion float reff = -shared.gen1[0] / error; - //if (tid == 0) shared.reff[order] = reff; - error += shared.gen1[0] * reff; - //error *= (1 - reff * reff); - if (tid < max_order - 1 - order) + error += shared.gen1[0] * reff; // Equivalent to error *= (1 - reff * reff); + + if (threadIdx.x < max_order - 1 - order) { - float gen1 = shared.gen1[tid + 1] + reff * gen0; - gen0 += shared.gen1[tid + 1] * reff; - shared.gen1[tid] = gen1; + float gen1 = shared.gen1[threadIdx.x + 1] + reff * gen0; + gen0 += shared.gen1[threadIdx.x + 1] * reff; + shared.gen1[threadIdx.x] = gen1; } + + // Store prediction error + if (threadIdx.x == 0) + shared.error[order] = error; + // Levinson-Durbin recursion - shared.ldr[tid] += (tid < order) * reff * shared.ldr[order - 1 - tid] + (tid == order) * reff; - shared.lpc[((order * (order + 1)) >> 1) + tid] = -shared.ldr[tid]; - shared.error[order] = error; + shared.ldr[threadIdx.x] += (threadIdx.x < order) * reff * shared.ldr[order - 1 - threadIdx.x] + (threadIdx.x == order) * reff; + + // Output coeffs + if (threadIdx.x <= order) + lpcs[shared.lpcOffs + order * 32 + threadIdx.x] = -shared.ldr[order - threadIdx.x]; } - shared.order[tid] = tid < max_order ? tid : max_order - 1; - shared.order[tid + 32] = 0; - if (taskCount2 < max_order) - { - // Select best orders based on something similar to Schwartz's Criterion - shared.error[tid] = tid < max_order ? __logf(shared.error[tid]) + (tid * 0.01f) : __logf(shared.error[0]) + 1; - shared.error[tid + 32] = __logf(shared.error[0]) + 1; - - for(int size = 2; size < 32; size <<= 1){ - //Bitonic merge - int ddd = (threadIdx.x & (size / 2)) == 0; - for(int stride = size / 2; stride > 0; stride >>= 1){ - int pos = 2 * threadIdx.x - (threadIdx.x & (stride - 1)); - if ((shared.error[pos] >= shared.error[pos + stride]) == ddd) - { - float t = shared.error[pos]; - shared.error[pos] = shared.error[pos + stride]; - shared.error[pos + stride] = t; - int t1 = shared.order[pos]; - shared.order[pos] = shared.order[pos + stride]; - shared.order[pos + stride] = t1; - } - } - } - - //ddd == dir for the last bitonic merge step - { - for(int stride = 16; stride > 0; stride >>= 1){ - int pos = 2 * threadIdx.x - (threadIdx.x & (stride - 1)); - if (shared.error[pos] >= shared.error[pos + stride]) - { - float t = shared.error[pos]; - shared.error[pos] = shared.error[pos + stride]; - shared.error[pos + stride] = t; - int t1 = shared.order[pos]; - shared.order[pos] = shared.order[pos + stride]; - shared.order[pos + stride] = t1; - } - } - } - - // float l1 = shared.error[tid]; - // #pragma unroll 0 - // for (int sh = 4; sh >= 0; sh --) - // { - //float l2 = shared.error[threadIdx.x + (1 << sh)]; - //shared.order[threadIdx.x] = shared.order[threadIdx.x + ((l2 < l1) << sh)]; - //shared.error[threadIdx.x] = l1 = min(l1, l2); - // } - } - } - __syncthreads(); - - // Quantization - for (int i = threadIdx.y; i < taskCount2; i += 8) - //for (int precision = 0; precision < 1; precision++)//precisions; precision++) - { - int order = shared.order[i]; - float lpc = threadIdx.x <= order ? shared.lpc[((order * (order + 1)) >> 1) + order - threadIdx.x] : 0.0f; - // get 15 bits of each coeff - int coef = __float2int_rn(lpc * (1 << 15)); - // remove sign bits - shared.tmpi[tid] = coef ^ (coef >> 31); - // OR reduction - shared.tmpi[tid] = shared.tmpi[tid] | shared.tmpi[tid + 8] | shared.tmpi[tid + 16] | shared.tmpi[tid + 24]; - shared.tmpi[tid] = shared.tmpi[tid] | shared.tmpi[tid + 2] | shared.tmpi[tid + 4] | shared.tmpi[tid + 6]; - //SUM32(shared.tmpi,tid,|=); - // choose precision - //int cbits = max(3, min(10, 5 + (shared.task.abits >> 1))); // - __float2int_rn(shared.PE[order - 1]) - int cbits = max(3, min(min(13 - (shared.task.blocksize <= 2304) - (shared.task.blocksize <= 1152) - (shared.task.blocksize <= 576), shared.task.abits), __clz(order) + 1 - shared.task.abits)); - // calculate shift based on precision and number of leading zeroes in coeffs - int shift = max(0,min(15, __clz(shared.tmpi[threadIdx.y * 32] | shared.tmpi[threadIdx.y * 32 + 1]) - 18 + cbits)); - //if (shared.task.abits + 32 - __clz(order) < shift - //int shift = max(0,min(15, (shared.task.abits >> 2) - 14 + __clz(shared.tmpi[threadIdx.x & ~31]) + ((32 - __clz(order))>>1))); - // quantize coeffs with given shift - coef = max(-(1 << (cbits - 1)), min((1 << (cbits - 1)) -1, __float2int_rn(lpc * (1 << shift)))); - // error correction - //shared.tmp[threadIdx.x] = (threadIdx.x != 0) * (shared.arp[threadIdx.x - 1]*(1 << shared.task.shift) - shared.task.coefs[threadIdx.x - 1]); - //shared.task.coefs[threadIdx.x] = max(-(1 << (shared.task.cbits - 1)), min((1 << (shared.task.cbits - 1))-1, __float2int_rn((shared.arp[threadIdx.x]) * (1 << shared.task.shift) + shared.tmp[threadIdx.x]))); - // remove sign bits - shared.tmpi[tid] = coef ^ (coef >> 31); - // OR reduction - shared.tmpi[tid] = shared.tmpi[tid] | shared.tmpi[tid + 8] | shared.tmpi[tid + 16] | shared.tmpi[tid + 24]; - shared.tmpi[tid] = shared.tmpi[tid] | shared.tmpi[tid + 2] | shared.tmpi[tid + 4] | shared.tmpi[tid + 6]; - //SUM32(shared.tmpi,tid,|=); - // calculate actual number of bits (+1 for sign) - cbits = 1 + 32 - __clz(shared.tmpi[threadIdx.y * 32] | shared.tmpi[threadIdx.y * 32 + 1]); - - // output shift, cbits and output coeffs - int taskNo = blockIdx.y * taskCount + blockIdx.x * taskCount2 + i; - if (threadIdx.x == 0) - tasks[taskNo].data.shift = shift; - if (threadIdx.x == 0) - tasks[taskNo].data.cbits = cbits; - if (threadIdx.x == 0) - tasks[taskNo].data.residualOrder = order + 1; - if (threadIdx.x <= order) - tasks[taskNo].coefs[threadIdx.x] = coef; + // Output prediction error estimates + if (threadIdx.x < max_order) + lpcs[shared.lpcOffs + max_order * 32 + threadIdx.x] = shared.error[threadIdx.x]; } } extern "C" __global__ void cudaQuantizeLPC( FlaCudaSubframeTask *tasks, int taskCount, // tasks per block - int taskCountLPC, // LPC tasks per block + int taskCountLPC, // tasks per set of coeffs int windowCount, // sets of coeffs per block float*lpcs, int max_order // should be <= 32 @@ -401,7 +321,9 @@ extern "C" __global__ void cudaQuantizeLPC( __shared__ struct { FlaCudaSubframeData task; volatile int tmpi[256]; - volatile int order[256]; + volatile int order[128]; + volatile int offset[128]; + volatile int index[256]; volatile float error[256]; } shared; const int tid = threadIdx.x + threadIdx.y * 32; @@ -411,34 +333,34 @@ extern "C" __global__ void cudaQuantizeLPC( ((int*)&shared.task)[tid] = ((int*)(tasks + blockIdx.y * taskCount))[tid]; __syncthreads(); - shared.order[tid] = min(max_order - 1, threadIdx.x) + min(threadIdx.y, windowCount - 1) * 32; - shared.error[tid] = 10000.0f + shared.order[tid]; + shared.index[tid] = min(max_order - 1, threadIdx.x) + min(threadIdx.y >> 1, windowCount - 1) * 32; + shared.error[tid] = 10000.0f + shared.index[tid]; + // Select best orders based on Akaike's Criteria + if ((threadIdx.y & 1) == 0 && (threadIdx.y >> 1) < windowCount) { - int lpcs_offs = (threadIdx.y + blockIdx.y * windowCount) * (max_order + 1) * 32; - - // Select best orders based on Akaike's Criteria - // Load prediction error estimates - if (threadIdx.y < windowCount && threadIdx.x < max_order) + if (threadIdx.x < max_order) + { + int lpcs_offs = ((threadIdx.y >> 1) + blockIdx.y * windowCount) * (max_order + 1) * 32; shared.error[tid] = __logf(lpcs[lpcs_offs + max_order * 32 + threadIdx.x]) + (threadIdx.x * 0.01f); - __syncthreads(); + } // Sort using bitonic sort for(int size = 2; size < 64; size <<= 1){ //Bitonic merge int ddd = (tid & (size / 2)) == 0; for(int stride = size / 2; stride > 0; stride >>= 1){ - __syncthreads(); - int pos = 2 * tid - (tid & (stride - 1)); + //__syncthreads(); + int pos = threadIdx.y * 32 + 2 * threadIdx.x - (threadIdx.x & (stride - 1)); if ((shared.error[pos] >= shared.error[pos + stride]) == ddd) { float t = shared.error[pos]; shared.error[pos] = shared.error[pos + stride]; shared.error[pos + stride] = t; - int t1 = shared.order[pos]; - shared.order[pos] = shared.order[pos + stride]; - shared.order[pos + stride] = t1; + int t1 = shared.index[pos]; + shared.index[pos] = shared.index[pos + stride]; + shared.index[pos + stride] = t1; } } } @@ -446,30 +368,34 @@ extern "C" __global__ void cudaQuantizeLPC( //ddd == dir for the last bitonic merge step { for(int stride = 32; stride > 0; stride >>= 1){ - __syncthreads(); - int pos = 2 * tid - (tid & (stride - 1)); + //__syncthreads(); + int pos = threadIdx.y * 32 + 2 * threadIdx.x - (threadIdx.x & (stride - 1)); if (shared.error[pos] >= shared.error[pos + stride]) { float t = shared.error[pos]; shared.error[pos] = shared.error[pos + stride]; shared.error[pos + stride] = t; - int t1 = shared.order[pos]; - shared.order[pos] = shared.order[pos + stride]; - shared.order[pos + stride] = t1; + int t1 = shared.index[pos]; + shared.index[pos] = shared.index[pos + stride]; + shared.index[pos + stride] = t1; } } } - } - __syncthreads(); + if (threadIdx.x < taskCountLPC) + { + shared.order[(threadIdx.y >> 1) * taskCountLPC + threadIdx.x] = shared.index[tid] & 31; + shared.offset[(threadIdx.y >> 1) * taskCountLPC + threadIdx.x] = (shared.index[tid] >> 5) + blockIdx.y * windowCount; + } + } + __syncthreads(); // Quantization - for (int i = threadIdx.y; i < taskCountLPC; i += 8) + for (int i = threadIdx.y; i < taskCountLPC * windowCount; i += 8) //for (int precision = 0; precision < 1; precision++)//precisions; precision++) { - int order = shared.order[i] & 31; - int lpcs_offs = ((shared.order[i] >> 5) + blockIdx.y * windowCount) * (max_order + 1) * 32; - float lpc = threadIdx.x <= order ? lpcs[lpcs_offs + order * 32 + order - threadIdx.x] : 0.0f; + int order = shared.order[i]; + float lpc = threadIdx.x <= order ? lpcs[(shared.offset[i] * (max_order + 1) + order) * 32 + threadIdx.x] : 0.0f; // get 15 bits of each coeff int coef = __float2int_rn(lpc * (1 << 15)); // remove sign bits diff --git a/CUETools.FlaCuda/flacuda.cubin b/CUETools.FlaCuda/flacuda.cubin index 5032fae..bbb59ac 100644 --- a/CUETools.FlaCuda/flacuda.cubin +++ b/CUETools.FlaCuda/flacuda.cubin @@ -292,8 +292,8 @@ code { code { name = cudaQuantizeLPC lmem = 0 - smem = 3176 - reg = 13 + smem = 4200 + reg = 11 bar = 1 const { segname = const @@ -301,7 +301,7 @@ code { offset = 0 bytes = 56 mem { - 0x000003ff 0x0000000f 0x3c23d70a 0x00000001 + 0x000003ff 0x0000000f 0x00000001 0x3c23d70a 0xffffffff 0x0000003f 0x0000001f 0x00000900 0x0000000c 0x00000480 0x00000240 0x00000003 0x00000020 0x0000009e @@ -310,158 +310,165 @@ code { bincode { 0xd0800205 0x00400780 0xa0000205 0x04000780 0xa0000015 0x04000780 0x30050201 0xc4100780 - 0x20000a11 0x04000780 0x200a8801 0x00000003 - 0x00020009 0xc0000780 0x308109fd 0x644107c8 - 0xa0015003 0x00000000 0x30020809 0xc4100780 + 0x20000a09 0x04000780 0x200a8401 0x00000003 + 0x00020009 0xc0000780 0x308105fd 0x644107c8 + 0xa0015003 0x00000000 0x3002040d 0xc4100780 0x10015003 0x00000280 0x1000ca01 0x0423c780 - 0x40014e0d 0x00200780 0x3010060d 0xc4100780 - 0x60004e01 0x0020c780 0x3007000d 0xc4100780 - 0x30060001 0xc4100780 0x20008600 0x2100e800 - 0x20000401 0x04000780 0xd00e0001 0x80c00780 + 0x40014e11 0x00200780 0x30100811 0xc4100780 + 0x60004e01 0x00210780 0x30070011 0xc4100780 + 0x30060001 0xc4100780 0x20008800 0x2100e800 + 0x20000601 0x04000780 0xd00e0001 0x80c00780 0x08000001 0xe4200780 0xf0000001 0xe0000002 - 0x861ffe03 0x00000000 0x213fee09 0x0fffffff - 0x213ff201 0x0fffffff 0x30020209 0xa4000780 - 0x30000a01 0xa4000780 0x30050409 0xc4100780 - 0x20000001 0x04008780 0x08022001 0xe4200780 - 0xd8088005 0x20000780 0xa400c001 0x44214780 - 0x3001cffd 0x642107c8 0xb0000001 0x0461c403 - 0x3005d3fd 0x642102c8 0x08042001 0xe4200780 - 0xa003b003 0x00000000 0x1003b003 0x00000100 - 0x1000ce01 0x0423c780 0x40014e09 0x00200780 - 0x30100409 0xc4100780 0x60004e09 0x00208780 - 0x2101f201 0x00000003 0x20000409 0x04004780 - 0x4005000d 0x00000780 0x6004020d 0x0000c780 - 0x3010060d 0xc4100780 0x60040009 0x0000c780 - 0x3005d201 0xc4300780 0x30050409 0xc4100780 - 0x20028000 0x20008a00 0x30020001 0xc4100780 - 0x2000d001 0x04200780 0xd00e0001 0x80c00780 - 0x90000001 0x60000780 0xa0000a09 0x44004780 - 0xc0180001 0x03f31723 0xe00a0401 0x03c23d73 - 0x08042001 0xe4200780 0x30010819 0xc4100782 - 0x1002801d 0x00000003 0x301f0e01 0xec100780 - 0xd0830001 0x04400780 0x20000001 0x0401c780 - 0x30010001 0xec100780 0x307c01fd 0x6c00c7c8 - 0x10064003 0x00000280 0xd0040009 0x04000780 - 0x307c05fd 0x6c0087c8 0x861ffe03 0x00000000 - 0x203f8009 0x0fffffff 0xd0040409 0x04000780 - 0x20428c0c 0x20008608 0x00020405 0xc0000780 - 0xd410d00d 0x20000780 0x1c00c009 0x0423c780 - 0x0002060d 0xc0000780 0xdc10d011 0x20000780 - 0xb002c00d 0x60218784 0x10000011 0x00000003 - 0xa0000615 0x08014780 0x10001011 0x20408280 - 0xd00509fd 0x000087d8 0xa0061003 0x00000000 - 0x10061003 0x00001280 0xd410d011 0x20000780 - 0x1000c00d 0x0423c784 0xdc10d011 0x20000780 - 0x1000c009 0x0423c784 0x0c043401 0xe420c780 - 0x04043401 0xe4208780 0xd408d011 0x20000780 - 0x1000c00d 0x0423c784 0xdc08d011 0x20000780 - 0x1000c009 0x0423c784 0x0c023401 0xe420c780 - 0x04023401 0xe4208780 0x30010001 0xec100782 - 0x307c01fd 0x6c0107d8 0x10045003 0x00001280 - 0x30010e1d 0xc4100780 0x30850ffd 0x6c40c7c8 - 0x1003d003 0x00000280 0x1020800d 0x00000003 - 0x861ffe03 0x00000000 0x203f8601 0x0fffffff - 0xd0040001 0x04000780 0x20408c08 0x20038400 - 0x00020005 0xc0000780 0xd410d00d 0x20000780 - 0x1c00c001 0x0423c780 0x0002040d 0xc0000780 - 0xdc10d011 0x20000780 0xb000c1fd 0x602187cc - 0xa0080003 0x00000000 0x10080003 0x00000100 - 0xd410d011 0x20000780 0x1000c009 0x0423c784 - 0xdc10d011 0x20000780 0x1000c001 0x0423c784 - 0x0c043401 0xe4208780 0x04043401 0xe4200780 - 0xd408d011 0x20000780 0x1000c009 0x0423c784 - 0xdc08d011 0x20000780 0x1000c001 0x0423c784 - 0x0c023401 0xe4208780 0x04023401 0xe4200780 - 0x3001060d 0xec100782 0x307c07fd 0x6c0107c8 - 0x10068003 0x00000280 0x861ffe03 0x00000000 - 0x3001cdfd 0x6c20c7c8 0x10000211 0x0403c780 - 0x30000003 0x00000280 0xa0004e09 0x04200780 - 0x1000ce01 0x0423c780 0x4004020d 0x00000780 - 0x200a8219 0x00000003 0x3010060d 0xc4100780 - 0x00020c0d 0xc0000780 0x60040019 0x0000c780 - 0x307c0bfd 0x640087c8 0x2101f20d 0x00000003 - 0xdc088005 0x20000780 0x3405c001 0xec300780 - 0x20000c01 0x04000780 0x40010c1d 0x00000780 - 0x60000e21 0x0001c780 0xd486c01d 0x04600780 - 0x30101021 0xc4100780 0x30050e25 0x640187e0 - 0x60000c01 0x00020780 0xa00013fd 0x0c0147d8 - 0xa00a5003 0x00000000 0x30050001 0xc4100780 - 0x100a4003 0x00002100 0x30050e21 0xc4100780 - 0x20088000 0x20008e00 0x20400001 0x04014780 + 0x861ffe03 0x00000000 0x30010211 0xe4100780 + 0x213fee0d 0x0fffffff 0x213ff201 0x0fffffff + 0x3003080d 0xa4000780 0x30000a01 0xa4000780 + 0x3005060d 0xc4100780 0x20000001 0x0400c780 + 0x08042001 0xe4200780 0xd8108005 0x20000780 + 0xd082020d 0x04400780 0xa400c001 0x44214780 + 0x307c07fd 0x640087c8 0xb0000001 0x0461c403 + 0x3004cffd 0x642102c8 0x08062001 0xe4200780 + 0xa0096003 0x00000000 0x10096003 0x00000100 + 0x3005d3fd 0x6420c7c8 0xa0040003 0x00000000 + 0x10040003 0x00000280 0x1000ce01 0x0423c780 + 0x40014e0d 0x00200780 0x3010060d 0xc4100780 + 0x60004e0d 0x0020c780 0x2101f201 0x00000003 + 0x2000060d 0x04010780 0x40070019 0x00000780 + 0x60060219 0x00018780 0x30100c19 0xc4100780 + 0x6006000d 0x00018780 0x3005d201 0xc4300780 + 0x3005060d 0xc4100780 0x20038000 0x20008a00 0x30020001 0xc4100780 0x2000d001 0x04200780 - 0xd00e0021 0x80c00780 0x100a5003 0x00000780 - 0x1000f821 0x0403c780 0xd8010005 0x20000782 - 0xc0001001 0x04700003 0xa0000001 0x8c004780 - 0x301f0025 0xec100780 0xd0090001 0x04008780 - 0x08002001 0xe4200780 0x1400d025 0x0423c780 - 0x1800f001 0x0423c780 0xd409c025 0x04204780 - 0xd800e001 0x04204780 0xd0090001 0x04004780 + 0xd00e0001 0x80c00780 0x90000001 0x60000780 + 0xa0000a0d 0x44004780 0xc0180001 0x03f31723 + 0xe00a0601 0x03c23d73 0x08062001 0xe4200780 + 0x30040201 0xc4100782 0x20000001 0x04014780 + 0x10028021 0x00000003 0x3001001d 0xc4100780 + 0x301f1001 0xec100780 0xd0820001 0x04400780 + 0x20000001 0x04020780 0x30010001 0xec100780 + 0x307c01fd 0x6c00c7c8 0x1006a003 0x00000280 + 0xd002000d 0x04000780 0x307c07fd 0x6c0087c8 + 0x203f800d 0x0fffffff 0xd005060d 0x04000780 + 0x20438e18 0x20008c0c 0x00020605 0xc0000780 + 0xd418d00d 0x20000780 0x1c00c00d 0x0423c780 + 0x00020c0d 0xc0000780 0xdc18d011 0x20000780 + 0xb003c019 0x60218784 0x10000019 0x00000003 + 0xa0000c1d 0x08014780 0x10001019 0x20408280 + 0xd0070dfd 0x000087d8 0xa0067003 0x00000000 + 0x10067003 0x00001280 0xd418d011 0x20000780 + 0x1000c019 0x0423c784 0xdc18d011 0x20000780 + 0x1000c00d 0x0423c784 0x0c063401 0xe4218780 + 0x04063401 0xe420c780 0xd410d011 0x20000780 + 0x1000c019 0x0423c784 0xdc10d011 0x20000780 + 0x1000c00d 0x0423c784 0x0c043401 0xe4218780 + 0x04043401 0xe420c780 0x30010001 0xec100782 + 0x307c01fd 0x6c0107d8 0x1004c003 0x00001280 + 0x30011021 0xc4100780 0x308511fd 0x6c40c7c8 + 0x10044003 0x00000280 0x1020800d 0x00000003 + 0x203f8601 0x0fffffff 0xd0050001 0x04000780 + 0x20408e08 0x20038400 0x00020005 0xc0000780 + 0xd418d00d 0x20000780 0x1c00c001 0x0423c780 + 0x0002040d 0xc0000780 0xdc18d011 0x20000780 + 0xb000c1fd 0x602187cc 0xa0085003 0x00000000 + 0x10085003 0x00000100 0xd418d011 0x20000780 + 0x1000c009 0x0423c784 0xdc18d011 0x20000780 + 0x1000c001 0x0423c784 0x0c063401 0xe4208780 + 0x04063401 0xe4200780 0xd410d011 0x20000780 + 0x1000c009 0x0423c784 0xdc10d011 0x20000780 + 0x1000c001 0x0423c784 0x0c043401 0xe4208780 + 0x04043401 0xe4200780 0x3001060d 0xec100782 + 0x307c07fd 0x6c0107c8 0x1006e003 0x00000280 + 0x3005cdfd 0x6420c7c8 0x10096003 0x00000280 + 0x6004cc09 0x60214780 0x1000ce01 0x0423c780 + 0xd810800d 0x20000780 0x00020405 0xc0000780 + 0x40014e0d 0x00200780 0xdc86c009 0x04600780 + 0x3010060d 0xc4100780 0x04023401 0xe4208780 + 0x60004e01 0x0020c780 0x3c05c009 0xec300780 + 0x20000001 0x04008780 0x04033401 0xe4200780 + 0xf0000001 0xe0000002 0x861ffe03 0x00000000 + 0x1100ec00 0x1100ee08 0x4005000d 0x00000780 + 0x6004020d 0x0000c780 0x3010060d 0xc4100780 + 0x60040019 0x0000c780 0x30010dfd 0x6c00c7c8 + 0x10000209 0x0403c780 0x30000003 0x00000280 + 0x200a8201 0x00000003 0x0002000d 0xc0000780 + 0x307c0bfd 0x640087c8 0xdc088005 0x20000780 + 0x3405c001 0x642187e0 0xa00001fd 0x0c0147d8 + 0xa00b8003 0x00000000 0x1400c00d 0x0423c780 + 0x100b7003 0x00002100 0xdc0c8005 0x20000780 + 0x2101f211 0x00000003 0x1400c001 0x0423c780 + 0x4009001d 0x00000780 0x6008021d 0x0001c780 + 0x30100e1d 0xc4100780 0x60080001 0x0001c780 + 0x20000001 0x0400c780 0x30050001 0xc4100780 + 0x20000a01 0x04000780 0x30020001 0xc4100780 + 0x2000d001 0x04200780 0xd00e0011 0x80c00780 + 0x100b8003 0x00000780 0x1000f811 0x0403c780 + 0xd8010005 0x20000782 0xc0000801 0x04700003 + 0xa0000001 0x8c004780 0x301f001d 0xec100780 + 0xd0070001 0x04008780 0x08002001 0xe4200780 + 0x1400d01d 0x0423c780 0x1800f001 0x0423c780 + 0xd407c01d 0x04204780 0xd800e001 0x04204780 + 0xd0070001 0x04004780 0x08002001 0xe4200780 + 0x1800e401 0x0423c780 0x1800ec1d 0x0423c780 + 0xd800e001 0x04204780 0xd807e81d 0x04204780 + 0xd0070001 0x04004780 0x307c07fd 0x6c0087e8 + 0x08002001 0xe4200780 0xa0000601 0x44066500 + 0x30170001 0xec102500 0x3100001d 0x04436500 + 0x1000181d 0x2440e280 0x00070205 0xc0000780 + 0x1400f601 0x0423c780 0xd400f401 0x042047e0 + 0xa0000001 0x44066680 0x30170001 0xec102680 + 0x31000001 0x04436680 0x10001801 0x2440e100 + 0x3087e3fd 0x6c60c7e8 0x3089e225 0x6c60c780 + 0x100d8021 0x00000003 0xd0821225 0x04400780 + 0x10001021 0x2440e280 0x308ae229 0x6c60c780 + 0x20401021 0x04024780 0xd0821425 0x04400780 + 0x3000ec1d 0x0421c780 0x20401021 0x04024780 + 0x20018e1d 0x00000003 0x3008ec21 0xac200780 + 0x30080e1d 0xac000780 0x308b0e21 0x8c400780 + 0x20000001 0x04020780 0x202e8001 0x0fffffff + 0x3081001d 0xac400780 0x10018001 0x00000003 + 0x307c0e1d 0x8c000780 0x30070025 0xc4000780 + 0x203f9021 0x0fffffff 0xa0001225 0x44014780 + 0x30080001 0xc4000780 0xc0041211 0x00000780 + 0x203f8021 0x0fffffff 0xa0000811 0x8c004780 + 0x30080811 0xac000780 0x30008001 0x00000003 + 0x30000811 0x8c000780 0x301f0801 0xec100780 + 0xd0000801 0x04008780 0x08002001 0xe4200780 + 0xd8010005 0x20000780 0x1400d021 0x0423c780 + 0x1800f001 0x0423c780 0xd408c021 0x04204780 + 0xd800e001 0x04204780 0xd0080001 0x04004780 0x08002001 0xe4200780 0x1800e401 0x0423c780 - 0x1800ec25 0x0423c780 0xd800e001 0x04204780 - 0xd809e825 0x04204780 0xd0090001 0x04004780 - 0x307c0ffd 0x6c0087e8 0x08002001 0xe4200780 - 0xa0000e01 0x44066500 0x30170001 0xec102500 - 0x31000025 0x04436500 0x10001825 0x2440e280 - 0x00070205 0xc0000780 0x1400f601 0x0423c780 - 0xd400f401 0x042047e0 0xa0000001 0x44066680 - 0x30170001 0xec102680 0x31000001 0x04436680 - 0x10001801 0x2440e100 0x3087e3fd 0x6c60c7e8 - 0x3089e22d 0x6c60c780 0x100d8029 0x00000003 - 0xd083162d 0x04400780 0x10001029 0x2440e280 - 0x308ae231 0x6c60c780 0x20401429 0x0402c780 - 0xd083182d 0x04400780 0x3000ec25 0x04224780 - 0x20401429 0x0402c780 0x20019225 0x00000003 - 0x300aec29 0xac200780 0x300a1225 0xac000780 - 0x308b1229 0x8c400780 0x20000001 0x04028780 - 0x202e8001 0x0fffffff 0x30810025 0xac400780 - 0x10018001 0x00000003 0x307c1225 0x8c000780 - 0x3009002d 0xc4000780 0x203f9429 0x0fffffff - 0xa000162d 0x44014780 0x300a0001 0xc4000780 - 0xc0081621 0x00000780 0x203f8029 0x0fffffff - 0xa0001021 0x8c004780 0x300a1021 0xac000780 - 0x30008001 0x00000003 0x30001021 0x8c000780 - 0x301f1001 0xec100780 0xd0001001 0x04008780 - 0x08002001 0xe4200780 0xd8010005 0x20000780 - 0x1400d029 0x0423c780 0x1800f001 0x0423c780 - 0xd40ac029 0x04204780 0xd800e001 0x04204780 - 0xd00a0001 0x04004780 0x08002001 0xe4200780 - 0x1800e401 0x0423c780 0x1800ec29 0x0423c780 - 0xd800e001 0x04204780 0xd80ae829 0x04204780 - 0xd00a0001 0x04004780 0x08002001 0xe4200780 - 0x00070205 0xc0000780 0x1400f601 0x0423c780 - 0xd400f401 0x042047e0 0xa0000001 0x44066680 - 0x30170001 0xec102680 0x31000029 0x04436680 - 0x10001829 0x2440e100 0xa011e003 0x00000000 - 0x1011e003 0x00000100 0x1000ca01 0x0423c780 - 0x4005002d 0x00000780 0x6004022d 0x0002c780 - 0x3010162d 0xc4100780 0x60040001 0x0002c780 - 0x2000002d 0x04010780 0x30071601 0xc4100780 - 0x3006162d 0xc4100780 0x200b8000 0x2100e800 - 0x20088001 0x00000003 0xd00e0025 0xa0c00780 - 0x1011e003 0x00000100 0x1000ca01 0x0423c780 - 0x40050025 0x00000780 0x60040225 0x00024780 - 0x30101225 0xc4100780 0x60040001 0x00024780 - 0x20000001 0x04010780 0x30070025 0xc4100780 - 0x30060001 0xc4100780 0x20009200 0x2100e800 - 0x30219425 0x00000003 0x200c8001 0x00000003 - 0xd00e0025 0xa0c00780 0x1011e003 0x00000100 - 0x1000ca01 0x0423c780 0x40050025 0x00000780 - 0x60040225 0x00024780 0x30101225 0xc4100780 - 0x60040001 0x00024780 0x20000001 0x04010780 - 0x30070025 0xc4100780 0x30060001 0xc4100780 - 0x20001201 0x04000780 0x20018e1d 0x00000003 - 0x2000c801 0x04200780 0xd00e001d 0xa0c00780 - 0xf0000001 0xe0000002 0xa012e003 0x00000000 - 0x1012e003 0x00001100 0x1000ca01 0x0423c780 - 0x4005001d 0x00000780 0x6004021d 0x0001c780 - 0x30100e1d 0xc4100780 0x60040001 0x0001c780 - 0x20000001 0x04010780 0x3007001d 0xc4100780 - 0x30060001 0xc4100780 0x20000e1d 0x04000780 - 0x30020a01 0xc4100780 0x2107e81c 0x20078000 - 0x20008001 0x00000007 0xd00e0021 0xa0c00780 - 0xdc00400d 0x20000782 0x20088811 0x00000003 - 0x3004cdfd 0x6c2107d8 0x10090003 0x00001280 - 0xf0000001 0xe0000001 + 0x1800ec21 0x0423c780 0xd800e001 0x04204780 + 0xd808e821 0x04204780 0xd0080001 0x04004780 + 0x08002001 0xe4200780 0x00070205 0xc0000780 + 0x1400f601 0x0423c780 0xd400f401 0x042047e0 + 0xa0000001 0x44066680 0x30170001 0xec102680 + 0x31000021 0x04436680 0x10001821 0x2440e100 + 0xa012e003 0x00000000 0x1012e003 0x00000100 + 0x1000ca01 0x0423c780 0x40014e25 0x00200780 + 0x30101225 0xc4100780 0x60004e01 0x00224780 + 0x20000025 0x04008780 0x30071201 0xc4100780 + 0x30061225 0xc4100780 0x20098000 0x2100e800 + 0x20088001 0x00000003 0xd00e001d 0xa0c00780 + 0x1012e003 0x00000100 0x1000ca01 0x0423c780 + 0x40014e1d 0x00200780 0x30100e1d 0xc4100780 + 0x60004e01 0x0021c780 0x20000001 0x04008780 + 0x3007001d 0xc4100780 0x30060001 0xc4100780 + 0x20008e00 0x2100e800 0x3021901d 0x00000003 + 0x200c8001 0x00000003 0xd00e001d 0xa0c00780 + 0x1012e003 0x00000100 0x1000ca01 0x0423c780 + 0x40014e1d 0x00200780 0x30100e1d 0xc4100780 + 0x60004e01 0x0021c780 0x20000001 0x04008780 + 0x3007001d 0xc4100780 0x30060001 0xc4100780 + 0x20000e01 0x04000780 0x2001860d 0x00000003 + 0x2000c801 0x04200780 0xd00e000d 0xa0c00780 + 0xf0000001 0xe0000002 0xa013d003 0x00000000 + 0x1013d003 0x00001100 0x1000ca01 0x0423c780 + 0x40014e0d 0x00200780 0x3010060d 0xc4100780 + 0x60004e01 0x0020c780 0x20000001 0x04008780 + 0x3007000d 0xc4100780 0x30060001 0xc4100780 + 0x2000060d 0x04000780 0x30020a01 0xc4100780 + 0x2103e80c 0x20038000 0x20008001 0x00000007 + 0xd00e0011 0xa0c00780 0xdc00400d 0x20000782 + 0x20088409 0x00000003 0x30020dfd 0x6c0107d8 + 0x100a3003 0x00001280 0xf0000001 0xe0000001 } } code { @@ -1723,229 +1730,96 @@ code { code { name = cudaComputeLPC lmem = 0 - smem = 4140 - reg = 10 - bar = 1 + smem = 760 + reg = 8 + bar = 0 const { segname = const segnum = 1 offset = 0 - bytes = 64 + bytes = 16 mem { - 0x000003ff 0x0000000f 0x7e800000 0x3c23d70a - 0x3f317218 0x00000001 0xffffffff 0x0000001f - 0x00000900 0x0000000c 0x00000480 0x00000240 - 0x00000003 0x3e800000 0x00000020 0x0000009e + 0x0000000f 0x000003ff 0x7e800000 0x3e800000 } } bincode { - 0xd0800205 0x00400780 0xa0000205 0x04000780 - 0xa0000011 0x04000780 0x30050201 0xc4100780 - 0x20000815 0x04000780 0x30810bfd 0x644107c8 - 0xa0015003 0x00000000 0x00070205 0xc0000780 - 0x10015003 0x00000280 0x1000ca01 0x0423c780 - 0x40014e09 0x00200780 0x30100409 0xc4100780 - 0x60004e01 0x00208780 0x30070009 0xc4100780 - 0x30060001 0xc4100780 0x20000409 0x04000780 - 0x30020a01 0xc4100780 0x2102e808 0x20028000 - 0xd00e0001 0x80c00780 0x00020a09 0xc0000780 - 0x08001401 0xe4200780 0xf0000001 0xe0000002 - 0x861ffe03 0x00000000 0x3001cffd 0x6c2047c8 - 0xa004c003 0x00000000 0x10000209 0x0403c780 - 0x1004c003 0x00000280 0x200a8a01 0x00000003 - 0x00020009 0xc0000780 0x3004d3fd 0x6c2107d8 - 0x307c09fd 0x640087c8 0xa003b003 0x00000000 - 0x08002001 0xe43f0780 0x1003b003 0x00001100 - 0xa0004c0d 0x04200780 0x10004e01 0x0023c780 - 0x6000480d 0x0020c780 0x1000d201 0x0423c780 - 0x40070019 0x00000780 0x60060219 0x00018780 - 0x30100c19 0xc4100780 0x60060019 0x00018780 - 0x2101ee0d 0x00000003 0x20000c01 0x04010780 - 0x40010c1d 0x00000780 0x60000e1d 0x0001c780 - 0x30100e1d 0xc4100780 0x60000c1d 0x0001c780 - 0x20000e1d 0x04008780 0x30020e1d 0xc4100780 - 0x3007060d 0xc4100780 0x2106f218 0x2107ec1c - 0xd00e0e21 0x80c00780 0x20208001 0x00000003 - 0xb800e021 0x00220780 0x300601fd 0x6c0047e8 - 0x08002001 0xe4220780 0x20000e1d 0x0400c780 - 0x10034003 0x00002280 0x1800f001 0x0423c782 - 0xd801000d 0x20000780 0xb800e001 0x00200780 - 0xbd006000 0xbd007000 0x08002001 0xe4200780 - 0x1800e401 0x0423c780 0xb800e001 0x00200780 - 0xb800e801 0x00200780 0xb800ec01 0x00200780 - 0x08002001 0xe4200780 0x1800e201 0x0423c680 - 0x0002040d 0xc0000680 0xb800e001 0x00200680 - 0x0c06d401 0xe4200680 0x20088409 0x00000003 - 0x3002cffd 0x6c2187e8 0x1001f003 0x00002280 - 0xf0000001 0xe0000002 0x861ffe03 0x00000000 - 0x307c03fd 0x640147c8 0xa00f1003 0x00000000 - 0x100f1003 0x00000280 0x00020a09 0xc0000780 - 0xd81b580d 0x20000780 0x1d00e008 0x1d00e000 - 0x08069401 0xe4208780 0xd01b500d 0x20000780 - 0x08065401 0xe43f0780 0x307ccffd 0x6c20c7c8 - 0x1c00c00d 0x0423c780 0x1008a003 0x00000280 - 0x10288009 0x00000003 0x0000040d 0xc0000780 - 0x213fee1d 0x0fffffff 0x1000f809 0x0403c780 - 0xd01a5011 0x20000780 0xb08207fd 0x605107c8 - 0x10000619 0x0403c780 0xa000c021 0xe4204784 - 0xc08d1021 0x00400680 0xc08d0c19 0x00400680 - 0x90000c24 0x20428e18 0xc0091021 0x00000780 - 0xd01a5011 0x20000780 0x30060bfd 0x6c0187c8 - 0xe008c00d 0x0020c784 0xd81a5811 0x20000500 - 0x1000c019 0x0423c504 0xe0001019 0x00018500 - 0xe008c001 0x00200504 0x08069401 0xe4218500 - 0x20400419 0x04014780 0x00020c11 0xc0000780 - 0xd0194811 0x20000784 0x40050825 0x00000780 - 0x300505fd 0x6c00c7c8 0xc008c019 0x00200784 - 0x60040a25 0x00024780 0x1000f819 0x0403c280 - 0x30101225 0xc4100780 0x300505fd 0x6c0147c8 - 0xb0000c21 0x00020780 0x60040825 0x00024780 - 0x10000c21 0x0403c280 0xd8195011 0x20000780 - 0x20000425 0x04024780 0xb000c019 0x00220784 - 0x30011221 0xec100780 0x08065401 0xe4218780 - 0x20000a19 0x04020780 0xa000c021 0xe4204784 - 0x00020c11 0xc0000780 0x00023401 0xe4220784 - 0x20018409 0x00000003 0x0c070201 0xe420c780 - 0x3002cffd 0x6c2147c8 0xdc00080d 0x20000780 - 0x1005e003 0x00000280 0x1008b003 0x00000780 - 0x213fee1d 0x0fffffff 0xa0000a09 0x44014780 - 0x3005cffd 0x6c20c7c8 0x10000401 0x0403c780 - 0xa0000e01 0x44014280 0x1000ce0d 0x0423c780 - 0x08079601 0xe4200780 0x3003d1fd 0x6c2187c8 - 0x0807d601 0xe43f0780 0x100f1003 0x00000280 - 0x3005cffd 0x6c20c7c8 0xa00a2003 0x00000000 - 0x1009d003 0x00000280 0xd81c580d 0x20000780 - 0x1c00c001 0x0423c780 0x90000001 0x60000780 - 0xc0180001 0x03f31723 0xe00a0401 0x03c23d73 - 0x100a2003 0x00000780 0xd01c580d 0x20000780 - 0x1c00c009 0x0423c780 0x10008001 0x03f80003 - 0x90000409 0x60000780 0xe0180401 0x03f31723 - 0x08071601 0xe4200782 0xd01c580d 0x20000780 - 0x1c00c009 0x0423c780 0x10008001 0x03f80003 - 0x90000409 0x60000780 0xe0180401 0x03f31723 - 0x08075601 0xe4200780 0x30010819 0xc4100780 - 0x1002801d 0x00000003 0x301f0e01 0xec100780 - 0xd0850001 0x04400780 0x20000001 0x0401c780 - 0x30010001 0xec100780 0x307c01fd 0x6c00c7c8 - 0x100d2003 0x00000280 0xd0040009 0x04000780 - 0x307c05fd 0x640087c8 0x203f8009 0x0fffffff - 0xd0040409 0x04000780 0x20428c08 0x2000840c - 0x0002060d 0xc0000780 0xdc1c5811 0x20000780 - 0x00020409 0xc0000780 0x1000c009 0x0423c784 - 0xd81c5811 0x20000780 0xb002c00d 0x60218784 - 0x10000011 0x00000003 0xa0000615 0x08014780 - 0x10001811 0x20408280 0xd00509fd 0x000087d8 - 0xa00cf003 0x00000000 0x100cf003 0x00001280 - 0xdc1c5811 0x20000780 0x1000c00d 0x0423c784 - 0xd81c5811 0x20000780 0x1000c009 0x0423c784 - 0x08071601 0xe420c780 0x0c071601 0xe4208780 - 0xd81e5811 0x20000780 0xa000c00d 0x8c264784 - 0xdc1e5811 0x20000780 0x1000c009 0x0423c784 - 0x08079601 0xe4208780 0xa0000609 0x44014780 - 0x0c079601 0xe4208780 0x30010001 0xec100782 - 0x307c01fd 0x6c0107d8 0x100b3003 0x00001280 - 0x30010e1d 0xc4100780 0x30870ffd 0x6c40c7c8 - 0x100ab003 0x00000280 0x1010800d 0x00000003 - 0x203f8601 0x0fffffff 0xd0040001 0x04000780 - 0x20408c00 0x20038008 0x0002040d 0xc0000780 - 0xdc1c5811 0x20000780 0x00020009 0xc0000780 - 0x1000c001 0x0423c784 0xd81c5811 0x20000780 - 0xb000c1fd 0x602187cc 0xa00ee003 0x00000000 - 0x100ee003 0x00000100 0xdc1c5811 0x20000780 - 0x1000c009 0x0423c784 0xd81c5811 0x20000780 - 0x1000c001 0x0423c784 0x08071601 0xe4208780 - 0x0c071601 0xe4200780 0xd81e5811 0x20000780 - 0xa000c009 0x8c264784 0xdc1e5811 0x20000780 - 0x1000c001 0x0423c784 0x08079601 0xe4200780 - 0xa0000401 0x44014780 0x0c079601 0xe4200780 - 0x3001060d 0xec100782 0x307c07fd 0x6c0107c8 - 0x100d6003 0x00000280 0xf0000001 0xe0000002 - 0x861ffe03 0x00000000 0x3001d1fd 0x6c20c7c8 - 0x10000201 0x0403c780 0x30000003 0x00000280 - 0x200a8a09 0x00000003 0x200a8205 0x00000003 - 0x0002040d 0xc0000780 0x00020209 0xc0000780 - 0x307c09fd 0x640087c8 0xd81e0811 0x20000780 - 0xa000c00d 0x8c264784 0x30040605 0x640187e0 - 0xa00003fd 0x0c0147d8 0xa010d003 0x00000000 - 0x1010c003 0x00002100 0x20018605 0x00000003 - 0x40030c09 0x00000780 0x60020e09 0x00008780 - 0x30100409 0xc4100780 0x60020c05 0x00008780 - 0x30010205 0xec100780 0x20018604 0x20448204 - 0x00020211 0xc0000780 0xd008d011 0x20000784 - 0x1000c005 0x0423c784 0x1010d003 0x00000780 - 0x1000f805 0x0403c780 0xdc010011 0x20000782 - 0xc0000209 0x04700003 0xa0000409 0x8c004780 - 0x301f0415 0xec100780 0xd0050409 0x04008780 - 0x0c002001 0xe4208780 0x1000d015 0x0423c784 - 0x1c00f009 0x0423c780 0xd005c015 0x04204784 - 0xdc02e009 0x04204780 0xd0050409 0x04004780 - 0x0c002001 0xe4208780 0x1c00e409 0x0423c780 - 0x1c00ec15 0x0423c780 0xdc02e009 0x04204780 - 0xdc05e815 0x04204780 0xd0050409 0x04004780 - 0x307c07fd 0x6c0087e8 0x0c002001 0xe4208780 - 0xa0000609 0x44066500 0x30170409 0xec102500 - 0x31000415 0x0443e500 0x10001c15 0x2440e280 - 0x1400f609 0x0423c780 0xd402f409 0x042047e0 - 0xa0000409 0x44066680 0x30170409 0xec102680 - 0x31000409 0x0443e680 0x10001c09 0x2440e100 - 0x3088e3fd 0x6c60c7e8 0x308ae21d 0x6c60c780 - 0x100d8019 0x00000003 0xd0850e1d 0x04400780 - 0x10001219 0x2440e280 0x308be221 0x6c60c780 - 0x20400c19 0x0401c780 0xd085101d 0x04400780 - 0x3000ec15 0x04214780 0x20400c19 0x0401c780 - 0x20018a15 0x00000003 0x3006ec19 0xac200780 - 0x30060a15 0xac000780 0x308c0a15 0x8c400780 - 0x20000409 0x04014780 0x202e8409 0x0fffffff - 0x30810419 0xac400780 0x10018009 0x00000003 - 0x307c0c19 0x8c000780 0x3006041d 0xc4000780 - 0x203f8a21 0x0fffffff 0xa0000e15 0x44014780 - 0x30080409 0xc4000780 0xc0010a05 0x00000780 - 0x203f8415 0x0fffffff 0xa0000205 0x8c004780 - 0x30050205 0xac000780 0x30008409 0x00000003 - 0x30020215 0x8c000780 0x301f0a05 0xec100780 - 0xd0010a05 0x04008780 0x0c002001 0xe4204780 - 0xdc010011 0x20000780 0x1000d009 0x0423c784 - 0x1c00f005 0x0423c780 0xd002c009 0x04204784 - 0xdc01e005 0x04204780 0xd0020205 0x04004780 - 0x0c002001 0xe4204780 0x1c00e405 0x0423c780 - 0x1c00ec09 0x0423c780 0xdc01e005 0x04204780 - 0xdc02e809 0x04204780 0xd0020205 0x04004780 - 0x0c002001 0xe4204780 0x1400f605 0x0423c780 - 0xd401f405 0x042047e0 0xa0000205 0x44066680 - 0x30170205 0xec102680 0x3100021d 0x0443e680 - 0x10001c1d 0x2440e100 0xa0187003 0x00000000 - 0x10187003 0x00000100 0x1100f008 0x1100ea04 - 0x41052c20 0x41032e24 0x30101021 0xc4100780 - 0x30101225 0xc4100780 0x60044c09 0x00220780 - 0x60024e05 0x00224780 0x20018404 0x20018004 - 0x30070209 0xc4100780 0x30060205 0xc4100780 - 0x20018404 0x2101e804 0x20088205 0x00000003 - 0xd00e0219 0xa0c00780 0x10187003 0x00000100 - 0x1100f008 0x1100ea04 0x41052c18 0x41032e20 - 0x30100c19 0xc4100780 0x30101021 0xc4100780 - 0x60044c09 0x00218780 0x60024e05 0x00220780 - 0x20018404 0x20018004 0x30070209 0xc4100780 - 0x30060205 0xc4100780 0x20018404 0x2101e808 - 0x30218e05 0x00000003 0x200c8409 0x00000003 - 0xd00e0405 0xa0c00780 0x10187003 0x00000100 - 0x1100f008 0x1100ea04 0x41052c18 0x41032e1c - 0x30100c19 0xc4100780 0x30100e1d 0xc4100780 - 0x60044c09 0x00218780 0x60024e05 0x0021c780 - 0x20018404 0x20018004 0x30070209 0xc4100780 - 0x30060205 0xc4100780 0x20000409 0x04004780 - 0x20018605 0x00000003 0x2000c809 0x04208780 - 0xd00e0405 0xa0c00780 0xf0000001 0xe0000002 - 0xa0198003 0x00000000 0x10198003 0x00001100 - 0x1100f008 0x1100ea04 0x41052c0c 0x41032e18 - 0x3010060d 0xc4100780 0x30100c19 0xc4100780 - 0x60044c09 0x0020c780 0x60024e05 0x00218780 - 0x20018404 0x20018004 0x30070209 0xc4100780 - 0x30060205 0xc4100780 0x20000405 0x04004780 - 0x30020809 0xc4100780 0x2101e804 0x20018404 - 0x20008205 0x00000007 0xd00e0215 0xa0c00780 - 0xd8004009 0x20000782 0x20088001 0x00000003 - 0x3000d1fd 0x6c2107d8 0x100fb003 0x00001280 - 0xf0000001 0xe0000001 + 0x10000005 0x0403c780 0xa0000401 0x04000780 + 0x308001fd 0x644107c8 0xa0011003 0x00000000 + 0x10011003 0x00000280 0x1000ca09 0x0423c780 + 0x40054e0d 0x00200780 0x3010060d 0xc4100780 + 0x60044e09 0x0020c780 0x3007040d 0xc4100780 + 0x30060409 0xc4100780 0x2000060d 0x04008780 + 0x30020009 0xc4100780 0x2103e80c 0x20038408 + 0xd00e0409 0x80c00780 0x00020005 0xc0000780 + 0x04001601 0xe4208780 0x307c01fd 0x6c0147ca + 0xa0027003 0x00000000 0x10027003 0x00000280 + 0x1000d209 0x0423c780 0x40054e0d 0x00200780 + 0x3010060d 0xc4100780 0x60044e09 0x0020c780 + 0xa0004c0d 0x04200780 0x2000040d 0x0400c780 + 0x2101ee09 0x00000003 0x40070811 0x00000780 + 0x60060a11 0x00010780 0x30100811 0xc4100780 + 0x6006080d 0x00010780 0x1000d409 0x0423c780 + 0x40070811 0x00000780 0x60060a15 0x00010780 + 0x30050611 0xc4100780 0x30100a15 0xc4100780 + 0x00017801 0xe4210780 0x60060809 0x00014780 + 0x00017a01 0xe4208780 0x307ccffd 0x6c2047ca + 0x10054003 0x00000280 0x200b8009 0x00000003 + 0x00020405 0xc0000780 0x307c01fd 0x640087c8 + 0x3000d5fd 0x6c2107d8 0x1000f811 0x0403c780 + 0x2101ee0d 0x00000003 0xa0043003 0x00000000 + 0x04002001 0xe43f0780 0x10000009 0x0403c780 + 0x10043003 0x00001100 0x40010c15 0x00000780 + 0x60000e15 0x00014780 0x30100a19 0xc4100780 + 0x30050615 0xc4100780 0x60000c19 0x00018780 + 0xd005e809 0x20000780 0x2906e01c 0x2007881c + 0x30020e1d 0xc4100780 0x2000cc1d 0x0421c780 + 0xd00e0e1d 0x80c00780 0x20208409 0x00000003 + 0xb400e01d 0x0021c780 0x3002d5fd 0x6c2107e8 + 0x04002001 0xe421c780 0x20000c19 0x04014780 + 0x10038003 0x00002280 0x1400f009 0x0423c782 + 0xd4010009 0x20000780 0xb400e009 0x00208780 + 0xb9026008 0xb9027008 0x04002001 0xe4208780 + 0x1400e409 0x0423c780 0xb400e009 0x00208780 + 0xb400e809 0x00208780 0xb400ec09 0x00208780 + 0x04002001 0xe4208780 0x1400e209 0x0423c680 + 0x00020809 0xc0000680 0xb400e009 0x00208680 + 0x08013601 0xe4208680 0x20018811 0x00000003 + 0x300407fd 0x6c0147e8 0x1002f003 0x00002280 + 0xd0820609 0x00400780 0xa00005fd 0x040007c8 + 0x30000003 0x00000280 0x00020005 0xc0000780 + 0xd404e009 0x20000780 0x1900e008 0x1900e004 + 0x0400b601 0xe4208780 0xd004d809 0x20000780 + 0x04007601 0xe43f0780 0x307ccffd 0x6c20c7c8 + 0x1800c009 0x0423c780 0x10090003 0x00000280 + 0x307c01fd 0x640087c8 0x213fee11 0x0fffffff + 0x1000f815 0x0403c780 0xd002d809 0x20000780 + 0xb08205fd 0x605107d8 0x1000040d 0x0403c780 + 0xa800c019 0xe4204780 0xc0830c19 0x00401680 + 0xc083060d 0x00401680 0x9000060c 0x2045881c + 0xc0030c19 0x00000780 0xd002d809 0x20000780 + 0x300701fd 0x640187d8 0xe806c009 0x00208780 + 0xd402e009 0x20001500 0x1800c00d 0x0423d500 + 0xe0010c0d 0x0000d500 0xe806c005 0x00205500 + 0x0400b601 0xe420d500 0x00020a09 0xc0000680 + 0x0800f601 0xe4208680 0x20400a0d 0x04000780 + 0x00020609 0xc0000780 0xd801d00d 0x20000780 + 0x30000bfd 0x6400c7d8 0xcc06c01d 0x00200780 + 0x1000f81d 0x0403d280 0x30000bfd 0x640147d8 + 0xb0000e0d 0x00018780 0x10000e0d 0x0403d280 + 0xd401d80d 0x20000780 0xbc00c00d 0x0020c780 + 0x30000bfd 0x640047d8 0x04007601 0xe420c780 + 0xa008c003 0x00000000 0x1008c003 0x00001280 + 0xd005e00d 0x20000780 0x30050a0d 0xc4100780 + 0x2d03e00c 0x2003800c 0xd801d809 0x20000780 + 0x30020619 0xc4100780 0xa800c00d 0xe4204780 + 0x2000d019 0x04218780 0xd00e0c0d 0xa0c00780 + 0xf0000001 0xe0000002 0x20018a15 0x00000003 + 0x3005cffd 0x6c2147d8 0x10063003 0x00001280 + 0x3000cffd 0x6420c7c8 0x30000003 0x00000280 + 0xd005e009 0x20000780 0x3005ce05 0xc4300780 + 0x2901e004 0x20018000 0xd403d805 0x20000780 + 0x30020005 0xc4100780 0x1500e000 0x2101f004 + 0xd00e0201 0xa0c00781 } } code {