diff --git a/CUETools.FlaCuda/FlaCudaWriter.cs b/CUETools.FlaCuda/FlaCudaWriter.cs index d9c907f..c84368b 100644 --- a/CUETools.FlaCuda/FlaCudaWriter.cs +++ b/CUETools.FlaCuda/FlaCudaWriter.cs @@ -1114,7 +1114,7 @@ namespace CUETools.Codecs.FlaCuda int calcPartitionPartCount = (calcPartitionPartSize >= 128) ? 1 : (256 / calcPartitionPartSize); CUfunction cudaChannelDecorr = channels == 2 ? (channelsCount == 4 ? task.cudaStereoDecorr : task.cudaChannelDecorr2) : task.cudaChannelDecorr; - CUfunction cudaCalcPartition = calcPartitionPartSize >= 128 ? task.cudaCalcLargePartition : task.cudaCalcPartition; + CUfunction cudaCalcPartition = calcPartitionPartSize >= 128 ? task.cudaCalcLargePartition : calcPartitionPartSize == 16 && task.frameSize >= 256 ? task.cudaCalcPartition16 : task.cudaCalcPartition; cuda.SetParameter(cudaChannelDecorr, 0 * sizeof(uint), (uint)task.cudaSamples.Pointer); cuda.SetParameter(cudaChannelDecorr, 1 * sizeof(uint), (uint)task.cudaSamplesBytes.Pointer); @@ -1203,20 +1203,20 @@ namespace CUETools.Codecs.FlaCuda cuda.SetParameter(task.cudaSumPartition, 0, (uint)task.cudaPartitions.Pointer); cuda.SetParameter(task.cudaSumPartition, 1 * sizeof(uint), (uint)max_porder); cuda.SetParameterSize(task.cudaSumPartition, 2U * sizeof(uint)); - cuda.SetFunctionBlockShape(task.cudaSumPartition, Math.Max(64, 1 << max_porder), 1, 1); + cuda.SetFunctionBlockShape(task.cudaSumPartition, Math.Max(32, 1 << (max_porder - 1)), 1, 1); cuda.SetParameter(task.cudaFindRiceParameter, 0, (uint)task.cudaRiceParams.Pointer); cuda.SetParameter(task.cudaFindRiceParameter, 1 * sizeof(uint), (uint)task.cudaPartitions.Pointer); cuda.SetParameter(task.cudaFindRiceParameter, 2 * sizeof(uint), (uint)max_porder); cuda.SetParameterSize(task.cudaFindRiceParameter, 3U * sizeof(uint)); - cuda.SetFunctionBlockShape(task.cudaFindRiceParameter, 8, 32, 1); + cuda.SetFunctionBlockShape(task.cudaFindRiceParameter, 32, 8, 1); cuda.SetParameter(task.cudaFindPartitionOrder, 0, (uint)task.cudaBestRiceParams.Pointer); cuda.SetParameter(task.cudaFindPartitionOrder, 1 * sizeof(uint), (uint)task.cudaBestResidualTasks.Pointer); cuda.SetParameter(task.cudaFindPartitionOrder, 2 * sizeof(uint), (uint)task.cudaRiceParams.Pointer); cuda.SetParameter(task.cudaFindPartitionOrder, 3 * sizeof(uint), (uint)max_porder); cuda.SetParameterSize(task.cudaFindPartitionOrder, 4U * sizeof(uint)); - cuda.SetFunctionBlockShape(task.cudaFindPartitionOrder, 256, 1, 1); + cuda.SetFunctionBlockShape(task.cudaFindPartitionOrder, 256, 1, 1); // issue work to the GPU cuda.LaunchAsync(cudaChannelDecorr, (task.frameCount * task.frameSize + 255) / 256, channels == 2 ? 1 : channels, task.stream); @@ -1238,7 +1238,7 @@ namespace CUETools.Codecs.FlaCuda if (!encode_on_cpu) { int bsz = calcPartitionPartCount * calcPartitionPartSize; - if (cudaCalcPartition.Pointer != task.cudaCalcPartition.Pointer) + if (cudaCalcPartition.Pointer == task.cudaCalcLargePartition.Pointer) cuda.LaunchAsync(task.cudaEncodeResidual, residualPartCount, channels * task.frameCount, task.stream); cuda.LaunchAsync(cudaCalcPartition, (task.frameSize + bsz - 1) / bsz, channels * task.frameCount, task.stream); if (max_porder > 0) @@ -1936,6 +1936,7 @@ namespace CUETools.Codecs.FlaCuda public CUfunction cudaCopyBestMethodStereo; public CUfunction cudaEncodeResidual; public CUfunction cudaCalcPartition; + public CUfunction cudaCalcPartition16; public CUfunction cudaCalcLargePartition; public CUfunction cudaSumPartition; public CUfunction cudaFindRiceParameter; @@ -2035,6 +2036,7 @@ namespace CUETools.Codecs.FlaCuda cudaCopyBestMethodStereo = cuda.GetModuleFunction("cudaCopyBestMethodStereo"); cudaEncodeResidual = cuda.GetModuleFunction("cudaEncodeResidual"); cudaCalcPartition = cuda.GetModuleFunction("cudaCalcPartition"); + cudaCalcPartition16 = cuda.GetModuleFunction("cudaCalcPartition16"); cudaCalcLargePartition = cuda.GetModuleFunction("cudaCalcLargePartition"); cudaSumPartition = cuda.GetModuleFunction("cudaSumPartition"); cudaFindRiceParameter = cuda.GetModuleFunction("cudaFindRiceParameter"); diff --git a/CUETools.FlaCuda/flacuda.cu b/CUETools.FlaCuda/flacuda.cu index 16ff04f..593d8f2 100644 --- a/CUETools.FlaCuda/flacuda.cu +++ b/CUETools.FlaCuda/flacuda.cu @@ -859,42 +859,51 @@ extern "C" __global__ void cudaCalcPartition( else s = 0; - __syncthreads(); // convert to unsigned - shared.data[tid] = min(0xfffff, (s << 1) ^ (s >> 31)); + s = min(0xfffff, (s << 1) ^ (s >> 31)); + + //__syncthreads(); + //shared.data[tid] = s; + //__syncthreads(); + + //shared.data[tid] = (shared.data[tid] & (0x0000ffff << (tid & 16))) | (((shared.data[tid ^ 16] & (0x0000ffff << (tid & 16))) << (~tid & 16)) >> (tid & 16)); + //shared.data[tid] = (shared.data[tid] & (0x00ff00ff << (tid & 8))) | (((shared.data[tid ^ 8] & (0x00ff00ff << (tid & 8))) << (~tid & 8)) >> (tid & 8)); + //shared.data[tid] = (shared.data[tid] & (0x0f0f0f0f << (tid & 4))) | (((shared.data[tid ^ 4] & (0x0f0f0f0f << (tid & 4))) << (~tid & 4)) >> (tid & 4)); + //shared.data[tid] = (shared.data[tid] & (0x33333333 << (tid & 2))) | (((shared.data[tid ^ 2] & (0x33333333 << (tid & 2))) << (~tid & 2)) >> (tid & 2)); + //shared.data[tid] = (shared.data[tid] & (0x55555555 << (tid & 1))) | (((shared.data[tid ^ 1] & (0x55555555 << (tid & 1))) << (~tid & 1)) >> (tid & 1)); + //shared.data[tid] = __popc(shared.data[tid]); + + __syncthreads(); + shared.data[tid + (tid / psize)] = s; + //shared.data[tid] = s; __syncthreads(); - s = (psize - shared.task.residualOrder * (threadIdx.y + blockIdx.x == 0)) * (threadIdx.x + 1); - int dpos = threadIdx.y * psize; - // calc number of unary bits for each residual part with each rice paramater + s = (psize - shared.task.residualOrder * (threadIdx.x + blockIdx.x == 0)) * (threadIdx.y + 1); + int dpos = __mul24(threadIdx.x, psize + 1); + //int dpos = __mul24(threadIdx.x, psize); + // calc number of unary bits for part threadIdx.x with rice paramater threadIdx.y #pragma unroll 0 for (int i = 0; i < psize; i++) - // for part (threadIdx.y) with this rice paramater (threadIdx.x) - s += shared.data[dpos + i] >> threadIdx.x; - __syncthreads(); - shared.data[tid] = s; - __syncthreads(); + s += shared.data[dpos + i] >> threadIdx.y; - // output length (transposed: k is now threadIdx.y) + // output length const int pos = (15 << (max_porder + 1)) * blockIdx.y + (threadIdx.y << (max_porder + 1)); if (threadIdx.y <= 14 && threadIdx.x < parts) - partition_lengths[pos + blockIdx.x * parts_per_block + threadIdx.x] = shared.data[threadIdx.y + (threadIdx.x << 4)]; + partition_lengths[pos + blockIdx.x * parts_per_block + threadIdx.x] = s; } -extern "C" __global__ void cudaCalcPartition1( +extern "C" __global__ void cudaCalcPartition16( int* partition_lengths, int* residual, int* samples, encodeResidualTaskStruct *tasks, int max_porder, // <= 8 - int psize, // == (shared.task.blocksize >> max_porder), < 256 - int parts_per_block // == 256 / psize, > 0, <= 16 + int psize, // == 16 + int parts_per_block // == 16 ) { __shared__ struct { - int data[256]; - int length[256]; - int plen[256]; + int data[256+32]; encodeResidualTaskStruct task; } shared; const int tid = threadIdx.x + (threadIdx.y << 4); @@ -902,32 +911,46 @@ extern "C" __global__ void cudaCalcPartition1( ((int*)&shared.task)[tid] = ((int*)(&tasks[blockIdx.y]))[tid]; __syncthreads(); - const int parts = min(parts_per_block, (1 << max_porder) - blockIdx.x * parts_per_block); + const int offs = (blockIdx.x << 8) + tid; + + // fetch samples + if (tid < 32) shared.data[tid] = min(offs, tid + shared.task.residualOrder) >= 32 ? samples[shared.task.samplesOffs + offs - 32] >> shared.task.wbits : 0; + shared.data[32 + tid] = samples[shared.task.samplesOffs + offs] >> shared.task.wbits; + __syncthreads(); + + // compute residual + int s = 0; + for (int c = -shared.task.residualOrder; c < 0; c++) + s += __mul24(shared.data[32 + tid + c], shared.task.coefs[shared.task.residualOrder + c]); + s = shared.data[32 + tid] - (s >> shared.task.shift); + + if (offs >= shared.task.residualOrder) + residual[shared.task.residualOffs + offs] = s; + else + s = 0; - // fetch residual - int offs = blockIdx.x * psize * parts_per_block + tid; - int s = (offs >= shared.task.residualOrder && tid < parts * psize) ? residual[shared.task.residualOffs + offs] : 0; // convert to unsigned - shared.data[tid] = min(0xfffff, (s << 1) ^ (s >> 31)); + s = min(0xfffff, (s << 1) ^ (s >> 31)); + __syncthreads(); + shared.data[tid + threadIdx.y] = s; __syncthreads(); - for (int k = 0; k < 15; k++) - { - shared.length[tid] = 0; - // calc number of unary bits for each residual part with each rice paramater - // for part (threadIdx.y) with rice paramater k - for (int i = 0; i < psize; i += 16) - shared.length[tid] += shared.data[threadIdx.y * psize + i + threadIdx.x] >> k; // * (i + threadIdx.x < psize) - SUM16(shared.length,tid,+=); - if (threadIdx.x == 0 && threadIdx.y < parts) - shared.plen[(k << 4) + threadIdx.y] = shared.length[tid]; - } - __syncthreads(); + // calc number of unary bits for part threadIdx.x with rice paramater threadIdx.y + int dpos = __mul24(threadIdx.x, 17); + s = + (shared.data[dpos + 0] >> threadIdx.y) + (shared.data[dpos + 1] >> threadIdx.y) + + (shared.data[dpos + 2] >> threadIdx.y) + (shared.data[dpos + 3] >> threadIdx.y) + + (shared.data[dpos + 4] >> threadIdx.y) + (shared.data[dpos + 5] >> threadIdx.y) + + (shared.data[dpos + 6] >> threadIdx.y) + (shared.data[dpos + 7] >> threadIdx.y) + + (shared.data[dpos + 8] >> threadIdx.y) + (shared.data[dpos + 9] >> threadIdx.y) + + (shared.data[dpos + 10] >> threadIdx.y) + (shared.data[dpos + 11] >> threadIdx.y) + + (shared.data[dpos + 12] >> threadIdx.y) + (shared.data[dpos + 13] >> threadIdx.y) + + (shared.data[dpos + 14] >> threadIdx.y) + (shared.data[dpos + 15] >> threadIdx.y); + // output length - const int pos = blockIdx.x * parts_per_block + threadIdx.x; - const int len1 = (psize - shared.task.residualOrder * (pos == 0)) * (threadIdx.y + 1); - if (threadIdx.y <= 14 && threadIdx.x < parts) - partition_lengths[((threadIdx.y + 15 * blockIdx.y) << (max_porder + 1)) + pos] = shared.plen[tid] + len1; + const int pos = ((15 * blockIdx.y + threadIdx.y) << (max_porder + 1)) + (blockIdx.x << 4) + threadIdx.x; + if (threadIdx.y <= 14) + partition_lengths[pos] = s + (16 - shared.task.residualOrder * (threadIdx.x + blockIdx.x == 0)) * (threadIdx.y + 1); } extern "C" __global__ void cudaCalcLargePartition( @@ -977,30 +1000,45 @@ extern "C" __global__ void cudaCalcLargePartition( } // Sums partition lengths for a certain k == blockIdx.x -// Requires 256 threads +// Requires 128 threads extern "C" __global__ void cudaSumPartition( int* partition_lengths, int max_porder ) { __shared__ struct { - int data[512]; // max_porder <= 8, data length <= 1 << 9. + volatile int data[512+32]; // max_porder <= 8, data length <= 1 << 9. } shared; const int pos = (15 << (max_porder + 1)) * blockIdx.y + (blockIdx.x << (max_porder + 1)); // fetch partition lengths shared.data[threadIdx.x] = threadIdx.x < (1 << max_porder) ? partition_lengths[pos + threadIdx.x] : 0; + shared.data[blockDim.x + threadIdx.x] = blockDim.x + threadIdx.x < (1 << max_porder) ? partition_lengths[pos + blockDim.x + threadIdx.x] : 0; __syncthreads(); - for (int porder = max_porder - 1; porder >= 0; porder--) + + int in_pos = (threadIdx.x << 1); + int out_pos = (1 << max_porder) + threadIdx.x; + int bs; + for (bs = 1 << (max_porder - 1); bs > 32; bs >>= 1) { - const int in_pos = (2 << max_porder) - (4 << porder); - const int out_pos = (2 << max_porder) - (2 << porder); - if (threadIdx.x < (1 << porder)) shared.data[out_pos + threadIdx.x] = shared.data[in_pos + (threadIdx.x << 1)] + shared.data[in_pos + (threadIdx.x << 1) + 1]; + if (threadIdx.x < bs) shared.data[out_pos] = shared.data[in_pos] + shared.data[in_pos + 1]; + in_pos += bs << 1; + out_pos += bs; __syncthreads(); } + if (threadIdx.x < 32) + for (; bs > 0; bs >>= 1) + { + shared.data[out_pos] = shared.data[in_pos] + shared.data[in_pos + 1]; + in_pos += bs << 1; + out_pos += bs; + } + __syncthreads(); if (threadIdx.x < (1 << max_porder)) partition_lengths[pos + (1 << max_porder) + threadIdx.x] = shared.data[(1 << max_porder) + threadIdx.x]; + if (blockDim.x + threadIdx.x < (1 << max_porder)) + partition_lengths[pos + (1 << max_porder) + blockDim.x + threadIdx.x] = shared.data[(1 << max_porder) + blockDim.x + threadIdx.x]; } // Finds optimal rice parameter for up to 16 partitions at a time. @@ -1014,45 +1052,36 @@ extern "C" __global__ void cudaFindRiceParameter( __shared__ struct { volatile int length[256]; volatile int index[256]; - volatile int outlen[32]; - volatile int outidx[32]; } shared; - const int tid = threadIdx.x + (threadIdx.y << 3); + const int tid = threadIdx.x + (threadIdx.y << 5); const int parts = min(32, 2 << max_porder); - const int pos = (15 << (max_porder + 1)) * blockIdx.y + ((tid >> 5) << (max_porder + 1)); + const int pos = (15 << (max_porder + 1)) * blockIdx.y + (threadIdx.y << (max_porder + 1)); // read length for 32 partitions - shared.index[tid] = ((tid & 31) < parts) ? partition_lengths[pos + blockIdx.x * 32 + (tid & 31)] : 0xffffff; - shared.length[tid] = ((tid >> 5) + 8 <= 14 && (tid & 31) < parts) ? partition_lengths[pos + (8 << (max_porder + 1)) + blockIdx.x * 32 + (tid & 31)] : 0xffffff; - __syncthreads(); - // transpose - int l1 = shared.index[threadIdx.y + (threadIdx.x << 5)]; - int l2 = shared.length[threadIdx.y + (threadIdx.x << 5)]; - __syncthreads(); + int l1 = (threadIdx.x < parts) ? partition_lengths[pos + blockIdx.x * 32 + threadIdx.x] : 0xffffff; + int l2 = (threadIdx.y + 8 <= 14 && threadIdx.x < parts) ? partition_lengths[pos + (8 << (max_porder + 1)) + blockIdx.x * 32 + threadIdx.x] : 0xffffff; // find best rice parameter - shared.index[tid] = threadIdx.x + ((l2 < l1) << 3); + shared.index[tid] = threadIdx.y + ((l2 < l1) << 3); shared.length[tid] = l1 = min(l1, l2); -#pragma unroll 2 - for (int sh = 2; sh > 0; sh --) - if (threadIdx.x < (1 << sh)) + __syncthreads(); +#pragma unroll 3 + for (int sh = 7; sh >= 5; sh --) + { + if (tid < (1 << sh)) { l2 = shared.length[tid + (1 << sh)]; shared.index[tid] = shared.index[tid + ((l2 < l1) << sh)]; shared.length[tid] = l1 = min(l1, l2); } - if (threadIdx.x == 0 && threadIdx.y < parts) - { - l2 = shared.length[tid + 1]; - shared.outidx[threadIdx.y] = shared.index[tid + (l2 < l1)]; - shared.outlen[threadIdx.y] = min(l1, l2); + __syncthreads(); } - __syncthreads(); - // output rice parameter if (tid < parts) - rice_parameters[(blockIdx.y << (max_porder + 2)) + blockIdx.x * parts + tid] = shared.outidx[tid]; - // output length - if (tid < parts) - rice_parameters[(blockIdx.y << (max_porder + 2)) + (1 << (max_porder + 1)) + blockIdx.x * parts + tid] = shared.outlen[tid]; + { + // output rice parameter + rice_parameters[(blockIdx.y << (max_porder + 2)) + blockIdx.x * parts + tid] = shared.index[tid]; + // output length + rice_parameters[(blockIdx.y << (max_porder + 2)) + (1 << (max_porder + 1)) + blockIdx.x * parts + tid] = shared.length[tid]; + } } extern "C" __global__ void cudaFindPartitionOrder( diff --git a/CUETools.FlaCuda/flacuda.cubin b/CUETools.FlaCuda/flacuda.cubin index 00ba6d8..9639b7c 100644 --- a/CUETools.FlaCuda/flacuda.cubin +++ b/CUETools.FlaCuda/flacuda.cubin @@ -383,16 +383,16 @@ code { code { name = cudaSumPartition lmem = 0 - smem = 2072 - reg = 6 + smem = 2200 + reg = 8 bar = 1 const { segname = const segnum = 1 offset = 0 - bytes = 4 + bytes = 8 mem { - 0xffffffff + 0x00000020 0x0000001f } } bincode { @@ -404,29 +404,56 @@ code { 0xa0004c0d 0x04200780 0x30100811 0xc4100780 0x30010605 0xc4000780 0x60004e01 0x00210780 0x20018000 0x20008400 0x30020001 0xc4100780 + 0x2000c801 0x04200780 0xd00e000d 0x80c00780 + 0x10014003 0x00000780 0x1000f80d 0x0403c780 + 0xa0004205 0x04200782 0x1100ea00 0x20018410 + 0x00020405 0xc0000780 0x30000801 0xe40007e0 + 0x04000c01 0xe420c780 0x00020805 0xc0000780 + 0x307c01fd 0x640087d8 0xa002c003 0x00000000 + 0x1002b003 0x00002680 0x2101ea0d 0x00000003 + 0x100f8001 0x00000003 0x30030001 0xc4000780 + 0x40014e15 0x00200780 0xa0004c11 0x04200780 + 0x30100a15 0xc4100780 0x3003080d 0xc4000780 + 0x60004e01 0x00214780 0x2003800c 0x20028200 + 0x20000001 0x0400c780 0x30020001 0xc4100780 0x2000c801 0x04200780 0xd00e0001 0x80c00780 - 0x10014003 0x00000780 0x1000f801 0x0403c780 - 0x00020405 0xc0000782 0x04000c01 0xe4200780 - 0x861ffe03 0x00000000 0x2100ca05 0x046007d0 - 0x1002f003 0x00001980 0x300105fd 0xe40007d8 - 0xa002a003 0x00000000 0x1002a003 0x00001280 - 0x1002800d 0x00000003 0x1000ca01 0x0423c780 - 0x10048011 0x00000003 0x30000601 0xc4000780 - 0x30010811 0xc4000780 0x30010415 0xc4100780 - 0x20400011 0x04010780 0x3001060d 0xc4000780 - 0x20058810 0x20438000 0x00020805 0xc0000780 - 0x2000840c 0x1500ee00 0x00020609 0xc0000780 - 0x2400cc01 0x04200780 0x08000c01 0xe4200780 - 0xf0000001 0xe0000002 0x861ffe03 0x00000000 - 0x203f8205 0x0fffffff 0x308003fd 0x6c4147d8 - 0x10019003 0x00001280 0x30000003 0x00000100 - 0x2101ea0d 0x00000003 0x100f8001 0x00000003 - 0x30030005 0xc4000780 0x40034e11 0x00200780 - 0xa0004c01 0x04200780 0x30100815 0xc4100780 - 0x30030011 0xc4000780 0x1001800d 0x00000003 - 0x1000ca01 0x0423c780 0x60024e05 0x00214780 - 0x30000601 0xc4000780 0x20048204 0x20018004 - 0x2000840c 0x20018400 0x00020605 0xc0000780 + 0x1002c003 0x00000780 0x1000f801 0x0403c780 + 0x04000c01 0xe4200782 0x861ffe03 0x00000000 + 0x10018011 0x00000003 0x1000ca01 0x0423c780 + 0x213fea15 0x0fffffff 0x3000080d 0xc4000780 + 0x30050815 0xc4000780 0x20000411 0x0400c780 + 0x30800bfd 0x6c40c7e8 0x30010419 0xc4100780 + 0x00020805 0xc0000780 0x10044003 0x00002280 + 0x30020bfd 0x6400c7e8 0x00020c09 0xc0002500 + 0x1800ce01 0x0423e500 0x0002080d 0xc0002500 + 0x2800cc01 0x04202500 0x0c000c01 0xe4202500 + 0x30010a01 0xc4100780 0x20008c18 0x20048a10 + 0x861ffe03 0x00000000 0x30010a15 0xec100780 + 0x30800bfd 0x6c4107e8 0x10038003 0x00002280 + 0x308105fd 0x644107e8 0xa0055003 0x00000000 + 0x10055003 0x00002280 0x307c0bfd 0x6c00c7e8 + 0x10055003 0x00002280 0xa0054003 0x00000000 + 0x0002080d 0xc0000780 0x00020c09 0xc0000780 + 0x30010a1d 0xc4100780 0x20058810 0x1900ee00 + 0x30010a15 0xec100780 0x2800cc01 0x04200780 + 0x307c0bfd 0x6c0107e8 0x0c000c01 0xe4200780 + 0x20000c19 0x0401c780 0x1004a003 0x00002280 + 0xf0000001 0xe0000002 0xf0000001 0xe0000002 + 0x861ffe03 0x00000000 0xa0066003 0x00000000 + 0x10066003 0x00000100 0x2101ea11 0x00000003 + 0x100f8001 0x00000003 0x30040001 0xc4000780 + 0x40014e19 0x00200780 0xa0004c15 0x04200780 + 0x30100c19 0xc4100780 0x30040a11 0xc4000780 + 0x60004e01 0x00218780 0x20048000 0x20008600 + 0x20000401 0x04000780 0x30020011 0xc4100780 + 0x1500ec00 0x2104e810 0xd00e0801 0xa0c00780 + 0xf0000001 0xe0000002 0x30000003 0x00001100 + 0x2101ea11 0x00000003 0x100f8001 0x00000003 + 0x30040001 0xc4000780 0x40014e19 0x00200780 + 0xa0004c15 0x04200780 0x30100c19 0xc4100780 + 0x30040a11 0xc4000780 0x60004e01 0x00218780 + 0x20048000 0x20008600 0x2003820c 0x20008200 + 0x20038404 0x20008400 0x00020205 0xc0000780 0x30020005 0xc4100780 0x1500ec00 0x2101e804 0xd00e0201 0xa0c00781 } @@ -709,10 +736,10 @@ code { } } code { - name = cudaCalcPartition1 + name = cudaCalcPartition16 lmem = 0 - smem = 3308 - reg = 11 + smem = 1388 + reg = 9 bar = 1 const { segname = const @@ -720,79 +747,89 @@ code { offset = 0 bytes = 24 mem { - 0x000003ff 0x0000002f 0x000fffff 0x00000001 - 0x0000000f 0x0000000e + 0x000003ff 0x0000002f 0x0000001f 0x000fffff + 0x0000000e 0x0000000f } } bincode { 0x10000005 0x0403c780 0xd0800601 0x00400780 - 0xa0000001 0x04000780 0xa0000415 0x04000780 - 0x30040005 0xc4100780 0x20000a21 0x04004780 - 0x308111fd 0x644107c8 0xa0012003 0x00000000 - 0x30021019 0xc4100780 0x10012003 0x00000280 - 0xa0004e05 0x04200780 0x30070209 0xc4100780 - 0x30060205 0xc4100780 0x20018404 0x2101ee04 + 0xa0000001 0x04000780 0xa0000409 0x04000780 + 0x30040005 0xc4100780 0x2000040d 0x04004780 + 0x308107fd 0x644107c8 0xa0012003 0x00000000 + 0x30020619 0xc4100780 0x10012003 0x00000280 + 0xa0004e05 0x04200780 0x30070211 0xc4100780 + 0x30060205 0xc4100780 0x20018804 0x2101ee04 0x20000c05 0x04004780 0xd00e0205 0x80c00780 - 0x00000c05 0xc0000780 0x04061601 0xe4204780 + 0x00000c05 0xc0000780 0x04025601 0xe4204780 0xf0000001 0xe0000002 0x861ffe03 0x00000000 - 0xa0004c0d 0x04200780 0x1100f404 0x1100f208 - 0x4006061c 0x40050c24 0x10018029 0x00000003 - 0x1000d011 0x0423c780 0x30100e1d 0xc4100780 - 0x30101225 0xc4100780 0x30041411 0xc4000780 - 0x6006041d 0x0001c780 0x60040c0d 0x00024780 - 0x30048e10 0x1100f208 0x40060625 0x00000780 - 0x3004d411 0xa4200780 0x60070425 0x00024780 - 0x40051029 0x00000780 0x30101225 0xc4100780 - 0x60041229 0x00028780 0x60060405 0x00024780 - 0xd0185805 0x20000780 0x3010140d 0xc4100780 - 0x20000205 0x04020780 0x60041009 0x0000c780 - 0x3401c1fd 0x6c20c7c8 0x300211fd 0x6c0042c8 - 0xa0035003 0x00000000 0x10034003 0x00000100 - 0xd018a805 0x20000780 0x2400c005 0x04204780 - 0x30020205 0xc4100780 0x2000ca05 0x04204780 - 0xd00e0205 0x80c00780 0x10035003 0x00000780 - 0x1000f805 0x0403c780 0x301f0209 0xec100782 - 0x30010205 0xc4100780 0xd0010405 0x04008780 - 0x00000c05 0xc0000780 0x30820205 0xac400780 - 0x04001601 0xe4204780 0x861ffe03 0x00000000 - 0x307c0a05 0x64008780 0x30000809 0x64010780 - 0xd0830205 0x04400780 0xd0830409 0x04400780 - 0xd002020d 0x04000780 0x307cd3fd 0x6c2107c8 - 0x1000f821 0x0403c780 0x00000c05 0xc0000780 - 0x1000f809 0x0403c780 0x04021601 0xe43f0780 - 0x1005b003 0x00000100 0x1000d205 0x0423c780 - 0x40010425 0x00000780 0x60000625 0x00024780 - 0x30101225 0xc4100780 0x60000429 0x00024780 - 0x20001405 0x04014780 0x200b8225 0x00000003 - 0x2000d229 0x04228780 0x00021205 0xc0000780 - 0xa005a003 0x00000000 0x20000a25 0x04028780 - 0x3408c029 0xec200780 0x20108205 0x00000003 - 0x20000409 0x04028780 0x00000c09 0xc0000780 - 0x300903fd 0x6c0047d8 0xd4008005 0x20000780 - 0x08021601 0xe4208780 0x10052003 0x00001280 - 0xf0000001 0xe0000002 0x00000c05 0xc0000780 - 0xd4086009 0x20000780 0x2800ce05 0x04208780 - 0x04021601 0xe4204780 0x2800c605 0x04204780 - 0x04021601 0xe4204780 0x2800c205 0x04204780 - 0x04021601 0xe4204780 0x2800c005 0x04204780 - 0x307c07fd 0x6c0087d8 0x04021601 0xe4204780 - 0x30041009 0xc4101500 0x20000009 0x04009500 - 0x00020405 0xc0001500 0x04041601 0xe4205500 - 0x20019021 0x00000003 0x308411fd 0x6c4147d8 - 0x10043003 0x00001280 0x861ffe03 0x00000000 - 0x300509fd 0x640107c8 0x308501fd 0x6440c2c8 - 0x30000003 0x00000100 0xd0185805 0x20000780 - 0x1000d20d 0x0423c780 0x20018009 0x00000003 - 0x1000d205 0x0423c780 0x3503e00c 0x40030810 - 0x610f2e01 0x00000003 0x60020a29 0x00010780 - 0x40070825 0x00000780 0x2101f021 0x00000003 - 0x20000e11 0x040147c0 0x30101429 0xc4100780 - 0x60060a1d 0x00024780 0x30080015 0xc4000780 - 0x60020801 0x00028780 0x30100e1d 0xc4100780 - 0x20000805 0x04014780 0x00000c05 0xc0000780 - 0x60060801 0x0001c100 0x30020205 0xc4100780 - 0xd4105805 0x20000780 0x2101e804 0x2500e000 - 0xd00e0201 0xa0c00781 + 0xa0004c11 0x04200780 0x30080805 0xc4100780 + 0x308207fd 0x6c4107c8 0xa002c003 0x00000000 + 0x20000215 0x0400c780 0x1002c003 0x00000280 + 0xd0095805 0x20000780 0x2400c005 0x0420c780 + 0x30010a05 0xac000780 0x308203fd 0x6c40c7c8 + 0xa002a003 0x00000000 0x10029003 0x00000280 + 0xd0096005 0x20000780 0x2400c005 0x04214780 + 0x30020205 0xc4100780 0x2000cc05 0x04204780 + 0x20008205 0x0ffffffb 0xd00e021d 0x80c00780 + 0x1400d405 0x0423c780 0x30010e05 0xec000780 + 0x1002a003 0x00000780 0x1000f805 0x0403c780 + 0x00000c05 0xc0000782 0x04001601 0xe4204780 + 0xd0096005 0x20000782 0x2400c005 0x04214780 + 0x30020205 0xc4100780 0x2000cc05 0x04204780 + 0xd00e021d 0x80c00780 0x1400d405 0x0423c780 + 0x30010e05 0xec000780 0x00000c05 0xc0000780 + 0x04005601 0xe4204780 0x861ffe03 0x00000000 + 0xd0095805 0x20000780 0x3500e01d 0x00000003 + 0x307c0ffd 0x6c0187c8 0x1000f821 0x0403c780 + 0x1400c005 0x0423c780 0x1004d003 0x00000280 + 0x20000e05 0x0400c780 0x200b821d 0x00000003 + 0x102c8005 0x00000003 0x00020e09 0xc0000780 + 0x00000205 0xc0000780 0x1000f81d 0x0403c780 + 0xd4098011 0x20000780 0xd801000d 0x20000780 + 0x20018e1d 0x00000003 0x1000c005 0x0423c784 + 0xd0095811 0x20000780 0x6c01c021 0x80220780 + 0x3007c1fd 0x6c2147cc 0xd8000809 0x20000780 + 0xd4000805 0x20000780 0x1000c005 0x0423c784 + 0x10042003 0x00000280 0xd0096805 0x20000780 + 0x1400c01d 0x0423c780 0x00000c05 0xc0000780 + 0x30071019 0xec000780 0xd4015805 0x20000780 + 0x300503fd 0x6c0107c8 0xa005d003 0x00000000 + 0x2440c005 0x04218780 0x1005c003 0x00000280 + 0xd009a805 0x20000780 0x2400c015 0x04214780 + 0x30020a15 0xc4100780 0x2000ca15 0x04214780 + 0xd00e0a05 0xa0c00780 0x1005d003 0x00000780 + 0x1000f805 0x0403c780 0xf0000001 0xe0000002 + 0x861ffe03 0x00000000 0x301f0215 0xec100780 + 0x30010219 0xc4100780 0x20000605 0x04000780 + 0xd0060a0d 0x04008780 0x00020205 0xc0000780 + 0x30830605 0xac400780 0x04001601 0xe4204780 + 0x861ffe03 0x00000000 0x40518405 0x00000003 + 0x00020205 0xc0000780 0x3400d605 0xec200780 + 0x3400d80d 0xec200780 0x3400da15 0xec200780 + 0x3400dc19 0xec200780 0x20038204 0x20068a0c + 0x3400de15 0xec200780 0x3400e019 0xec200780 + 0x20038204 0x20068a0c 0x3400e215 0xec200780 + 0x3400e419 0xec200780 0x20038204 0x20068a0c + 0x3400e615 0xec200780 0x3400e819 0xec200780 + 0x20038204 0x20068a0c 0x3400ea15 0xec200780 + 0x3400ec19 0xec200780 0x20038204 0x20068a0c + 0x3400ee15 0xec200780 0x3400f019 0xec200780 + 0x20038204 0x20068a0c 0x3400f215 0xec200780 + 0x3400f419 0xec200780 0x20038204 0x20068a0c + 0x308401fd 0x644107c8 0x20000215 0x0400c780 + 0x30000003 0x00000280 0x200009fd 0x040087c8 + 0xa0092003 0x00000000 0x1008f003 0x00000280 + 0xd0095805 0x20000780 0x20018005 0x00000003 + 0x3510e00d 0x00000003 0x40070419 0x00000780 + 0x60060619 0x00018780 0x30100c19 0xc4100780 + 0x60060405 0x00018780 0x20000205 0x04014780 + 0x10092003 0x00000780 0x30040005 0xc4100780 + 0x20000a05 0x04004780 0x20108205 0x00000003 + 0xf0000001 0xe0000002 0x610f2e01 0x00000003 + 0x2101f00d 0x00000003 0x3003000d 0xc4000780 + 0x30040801 0xc4100780 0x20038000 0x20008400 + 0x30020001 0xc4100780 0x2000c801 0x04200780 + 0xd00e0005 0xa0c00781 } } code { @@ -1059,93 +1096,89 @@ code { code { name = cudaFindRiceParameter lmem = 0 - smem = 2332 - reg = 10 + smem = 2076 + reg = 9 bar = 1 const { segname = const segnum = 1 offset = 0 - bytes = 20 + bytes = 24 mem { - 0x000003ff 0x00000020 0x0000001f 0x00000001 - 0x0000000e + 0x00000020 0x000003ff 0x00000001 0x0000000e + 0x00000080 0x00000040 } } bincode { - 0xd0800205 0x00400780 0xa0000211 0x04000780 0x10028009 0x00000003 0x1000cc05 0x0423c780 - 0xa0000015 0x04000780 0x30030801 0xc4100780 - 0x30010405 0xc4000780 0x20000a01 0x04000780 - 0x30810209 0xac400780 0xd0820005 0x04400780 - 0x3001040d 0x6c0107d0 0xa00007fd 0x0c0147c8 - 0xa001e003 0x00000000 0x30050019 0xec100780 - 0x1001d003 0x00001100 0x2101ec1d 0x00000003 - 0x100f800d 0x00000003 0x3007060d 0xc4000780 - 0x40074e21 0x00200780 0x30101021 0xc4100780 - 0x60064e21 0x00220780 0x30070c0d 0xc4000780 - 0x2000101d 0x0400c780 0x60824c0d 0x00604780 - 0x2000060d 0x0401c780 0x3002060d 0xc4100780 - 0x2000ca0d 0x0420c780 0xd00e060d 0x80c00780 - 0x1001e003 0x00000780 0x103f800d 0x000fffff - 0x1000f81d 0x0403c782 0x20088c21 0x00000003 - 0x30841021 0x6c40c780 0x1000061d 0x2440c280 - 0xa0001021 0x2c014780 0x00020005 0xc0000780 - 0xd0080ffd 0x040007c8 0x04020e01 0xe420c780 - 0xa0039003 0x00000000 0x10038003 0x00000100 - 0x2101ec1d 0x00000003 0x100f800d 0x00000003 - 0x3007060d 0xc4000780 0x40074e21 0x00200780 - 0x30101021 0xc4100780 0x60064e0d 0x00220780 - 0x10088021 0x00000003 0x30070c25 0xc4000780 - 0x30071019 0xc4000780 0x2009860c 0x20038c0c - 0x61202c0d 0x00000003 0x20000205 0x0400c780 - 0x30020205 0xc4100780 0x2000ca05 0x04204780 - 0xd00e0205 0x80c00780 0x10039003 0x00000780 - 0x103f8005 0x000fffff 0x04000e01 0xe4204782 - 0x861ffe03 0x00000000 0x30050a05 0xc4100780 - 0x20000205 0x04010780 0x00020209 0xc0000780 - 0xd808380d 0x20000780 0x1900ee18 0x1d00e004 - 0x861ffe03 0x00000000 0x300603fd 0x6c0107c8 - 0x20088a0d 0x00000003 0x10000a0d 0x0403c500 - 0x30060219 0xac000780 0x04020e01 0xe420c780 - 0x30020bfd 0xe41007c8 0xa0055003 0x00000000 - 0x04000e01 0xe4218780 0x10055003 0x00000280 - 0x2004800d 0x00000003 0x00020609 0xc0000780 - 0x10000005 0x0403c780 0x3806cffd 0x6c2047c8 - 0x10000605 0x0403c280 0x0002020d 0xc0000780 - 0xdc08380d 0x20000780 0x3806ce19 0xac200780 - 0x1c00c005 0x0423c780 0x04020e01 0xe4204780 - 0x04000e01 0xe4218780 0x30010bfd 0xe41007ca - 0xa0063003 0x00000000 0x10063003 0x00000280 - 0x2002800d 0x00000003 0x00020609 0xc0000780 - 0x10000005 0x0403c780 0x3806cffd 0x6c2047c8 - 0x10000605 0x0403c280 0x0002020d 0xc0000780 - 0xdc08380d 0x20000780 0x3806ce19 0xac200780 - 0x1c00c005 0x0423c780 0x04020e01 0xe4204780 - 0x04000e01 0xe4218780 0x307c0bfd 0x640087ca - 0x300405fd 0x640102c8 0xa0070003 0x00000000 - 0x10070003 0x00000100 0x3406d005 0x6c204780 - 0x30000205 0x04000780 0x00020209 0xc0000780 - 0xd808380d 0x20000780 0x00020809 0xc0000780 - 0x3406d00d 0xac200780 0x1c00c005 0x0423c780 - 0x08044e01 0xe4204780 0x08040e01 0xe420c780 + 0x30010409 0xc4000780 0x10000005 0x0403c780 + 0x30800409 0xac400780 0xa0000401 0x04000780 + 0x3000040d 0x640107d0 0xd0820609 0x00400780 + 0xa00007fd 0x0c0147c8 0xa001b003 0x00000000 + 0xa0000405 0x04000780 0x1001a003 0x00001100 + 0x2101ec11 0x00000003 0x100f800d 0x00000003 + 0x3004060d 0xc4000780 0x40074e15 0x00200780 + 0x30100a15 0xc4100780 0x60064e15 0x00214780 + 0x3004020d 0xc4000780 0x20000a11 0x0400c780 + 0x60804c0d 0x00600780 0x2000060d 0x04010780 + 0x3002060d 0xc4100780 0x2000ca0d 0x0420c780 + 0xd00e0611 0x80c00780 0x1001b003 0x00000780 + 0x103f8011 0x000fffff 0x1000f80d 0x0403c782 + 0x20088215 0x00000003 0x30830a19 0x6440c780 + 0x1000040d 0x2440c280 0xa0000c19 0x2c014780 + 0xd00607fd 0x040007c8 0xa0035003 0x00000000 + 0x10034003 0x00000100 0x2101ec19 0x00000003 + 0x100f800d 0x00000003 0x3006060d 0xc4000780 + 0x40074e1d 0x00200780 0x30100e1d 0xc4100780 + 0x60064e21 0x0021c780 0x3006021d 0xc4000780 + 0x1008800d 0x00000003 0x2000101d 0x0401c780 + 0x3006060d 0xc4000780 0x2000060d 0x0401c780 + 0x61202c0d 0x00000003 0x2000000d 0x0400c780 + 0x3002060d 0xc4100780 0x2000ca0d 0x0420c780 + 0xd00e0619 0x80c00780 0x10035003 0x00000780 + 0x103f8019 0x000fffff 0x3005020d 0xc4100782 + 0x20000601 0x04000780 0x300609fd 0x6c0107c8 + 0x1000020d 0x0403c780 0x10000a0d 0x0403c280 + 0x00020005 0xc0000780 0x30060805 0xac000780 + 0x04020e01 0xe420c780 0x04000e01 0xe4204780 + 0x861ffe03 0x00000000 0x308401fd 0x6c4187c8 + 0xa004e003 0x00000000 0x1004e003 0x00000280 + 0x20008011 0x0000000b 0x00020809 0xc0000780 + 0x1000000d 0x0403c780 0x3801cffd 0x6c2047c8 + 0x1000080d 0x0403c280 0x00020605 0xc0000780 + 0xd408380d 0x20000780 0x00020005 0xc0000780 + 0x3801ce05 0xac200780 0x1c00c00d 0x0423c780 + 0x04020e01 0xe420c780 0x04000e01 0xe4204780 0xf0000001 0xe0000002 0x861ffe03 0x00000000 - 0x30000405 0x6c0107d0 0xa00003fd 0x0c0147c8 - 0xa0081003 0x00000000 0x10081003 0x00001100 - 0x40054c11 0x00200780 0xa0004e05 0x04200780 - 0x2102ec0d 0x00000003 0x30100811 0xc4100780 - 0x30030205 0xc4000780 0x60044c0d 0x00210780 - 0x20018604 0x20018004 0xd4113809 0x20000780 - 0x3002020d 0xc4100780 0x1900e004 0x2103e80c - 0xd00e0605 0xa0c00780 0xf0000001 0xe0000002 - 0x30000003 0x00000100 0x2101ec0d 0x00000003 - 0x40054c19 0x00200780 0x10018005 0x00000003 - 0xa0004e11 0x04200780 0x2102ec15 0x00000003 - 0x30100c19 0xc4100780 0x30030205 0xc4000780 - 0x3005080d 0xc4000780 0x60044c09 0x00218780 - 0x20038204 0x20008400 0x20000001 0x04004780 - 0xd4103805 0x20000780 0x30020005 0xc4100780 - 0x1500e000 0x2101e804 0xd00e0201 0xa0c00781 + 0x308501fd 0x6c4187c8 0xa005f003 0x00000000 + 0x1005f003 0x00000280 0x20008011 0x00000007 + 0x00020809 0xc0000780 0x1000000d 0x0403c780 + 0x3801cffd 0x6c2047c8 0x1000080d 0x0403c280 + 0x00020605 0xc0000780 0xd408380d 0x20000780 + 0x00020005 0xc0000780 0x3801ce05 0xac200780 + 0x1c00c00d 0x0423c780 0x04020e01 0xe420c780 + 0x04000e01 0xe4204780 0xf0000001 0xe0000002 + 0x861ffe03 0x00000000 0x308001fd 0x6c4187c8 + 0xa0070003 0x00000000 0x10070003 0x00000280 + 0x20208011 0x00000003 0x00020809 0xc0000780 + 0x1000000d 0x0403c780 0x3801cffd 0x6c2047c8 + 0x1000080d 0x0403c280 0x00020605 0xc0000780 + 0xd408380d 0x20000780 0x00020005 0xc0000780 + 0x3801ce0d 0xac200780 0x1c00c005 0x0423c780 + 0x04020e01 0xe4204780 0x04000e01 0xe420c780 + 0xf0000001 0xe0000002 0x861ffe03 0x00000000 + 0x300005fd 0x6c00c7c8 0x30000003 0x00000280 + 0xa0004c0d 0x04200780 0x40060a05 0x00000780 + 0x2102ec19 0x00000003 0xa0004e15 0x04200780 + 0x30100211 0xc4100780 0x30060a05 0xc4000780 + 0x60060809 0x00010780 0x2101ec15 0x00000003 + 0x10018011 0x00000003 0x2000040d 0x04004780 + 0x30050811 0xc4000780 0x00020005 0xc0000780 + 0x2003800c 0x20028000 0x20000209 0x04010780 + 0xd4083809 0x20000780 0x30020605 0xc4100780 + 0x20028008 0x1900e000 0x2000c805 0x04204780 + 0x30020409 0xc4100780 0xd00e0201 0xa0c00780 + 0x1500ee00 0x2102e804 0xd00e0201 0xa0c00781 } } code { @@ -1418,7 +1451,7 @@ code { name = cudaCalcPartition lmem = 0 smem = 1388 - reg = 11 + reg = 14 bar = 1 const { segname = const @@ -1431,97 +1464,110 @@ code { } } bincode { - 0xd0800205 0x00400780 0xa0000209 0x04000780 - 0xa0000019 0x04000780 0x30040401 0xc4100780 - 0x20000c11 0x04000780 0x308109fd 0x644107c8 - 0xa0011003 0x00000000 0x3002081d 0xc4100780 + 0xd0800205 0x00400780 0xa0000215 0x04000780 + 0xa0000019 0x04000780 0x30040a01 0xc4100780 + 0x20000c05 0x04000780 0x308103fd 0x644107c8 + 0xa0011003 0x00000000 0x3002021d 0xc4100780 0x10011003 0x00000280 0xa0004e01 0x04200780 - 0x30070005 0xc4100780 0x30060001 0xc4100780 - 0x20008200 0x2100ee00 0x20000e01 0x04000780 + 0x30070009 0xc4100780 0x30060001 0xc4100780 + 0x20008400 0x2100ee00 0x20000e01 0x04000780 0xd00e0001 0x80c00780 0x00000e05 0xc0000780 0x04025601 0xe4200780 0xf0000001 0xe0000002 - 0x861ffe03 0x00000000 0xa0004c05 0x04200780 - 0x1000d201 0x0423c780 0x4002020d 0x00000780 - 0x3010060d 0xc4100780 0x6002000d 0x0000c780 - 0x1000d401 0x0423c780 0x40010c15 0x00000780 - 0x60000e15 0x00014780 0x30100a15 0xc4100780 - 0x60000c01 0x00014780 0x308209fd 0x6c4107c8 - 0xa0033003 0x00000000 0x20000015 0x04010780 + 0x861ffe03 0x00000000 0xa0004c09 0x04200780 + 0x1000d201 0x0423c780 0x4004020d 0x00000780 + 0x3010060d 0xc4100780 0x6004000d 0x0000c780 + 0x1000d401 0x0423c780 0x40010c11 0x00000780 + 0x60000e11 0x00010780 0x30100811 0xc4100780 + 0x60000c01 0x00010780 0x308203fd 0x6c4107c8 + 0xa0033003 0x00000000 0x20000025 0x04004780 0x10033003 0x00000280 0xd0095805 0x20000780 - 0x2400c001 0x04210780 0x30000a01 0xac000780 + 0x2400c001 0x04204780 0x30001201 0xac000780 0x308201fd 0x6c40c7c8 0xa0031003 0x00000000 0x10030003 0x00000280 0xd0096005 0x20000780 - 0x2400c001 0x04214780 0x30020001 0xc4100780 + 0x2400c001 0x04224780 0x30020001 0xc4100780 0x2000cc01 0x04200780 0x20008001 0x0ffffffb 0xd00e000d 0x80c00780 0x1400d401 0x0423c780 0x30000601 0xec000780 0x10031003 0x00000780 0x1000f801 0x0403c780 0x00000e05 0xc0000782 0x04001601 0xe4200780 0x1000d401 0x0423c782 - 0x4003000d 0x00000780 0x60020225 0x0000c780 + 0x4005000d 0x00000780 0x60040211 0x0000c780 0x10018021 0x00000003 0x1000d00d 0x0423c780 - 0x30101225 0xc4100780 0x3003100d 0xc4000780 - 0x60020021 0x00024780 0x3003900c 0x1100f200 - 0x3003d40d 0xa4200780 0x40010c25 0x00000780 - 0x60000e25 0x00024780 0x30101225 0xc4100780 - 0x60000c01 0x00024780 0x30040001 0x6c0107d0 + 0x30100811 0xc4100780 0x3003100d 0xc4000780 + 0x60040021 0x00010780 0x3003900c 0x1100f200 + 0x3003d411 0xa4200780 0x4001100d 0x00000780 + 0x6000120d 0x0000c780 0x3010060d 0xc4100780 + 0x60001001 0x0000c780 0x30010001 0x6c0107d0 0xa00001fd 0x0c0147c8 0xa004e003 0x00000000 0x1004d003 0x00001100 0xd0096005 0x20000780 - 0x2400c001 0x04214780 0x30020001 0xc4100780 - 0x2000cc01 0x04200780 0xd00e0025 0x80c00780 - 0x1400d401 0x0423c780 0x30001201 0xec000780 + 0x2400c001 0x04224780 0x30020001 0xc4100780 + 0x2000cc01 0x04200780 0xd00e000d 0x80c00780 + 0x1400d401 0x0423c780 0x30000601 0xec000780 0x1004e003 0x00000780 0x1000f801 0x0403c780 0x00000e05 0xc0000782 0x04005601 0xe4200780 0x861ffe03 0x00000000 0xd0095805 0x20000780 0x3500e029 0x00000003 0x307c15fd 0x6c0187d8 - 0x1000f825 0x0403c780 0x1400c001 0x0423c780 - 0x10068003 0x00001280 0x20001401 0x04010780 - 0x200b8001 0x00000003 0x102c8011 0x00000003 - 0x00020009 0xc0000780 0x00000805 0xc0000780 - 0x1000f811 0x0403c780 0xd4098011 0x20000780 - 0xd801000d 0x20000780 0x20018811 0x00000003 + 0x1000f80d 0x0403c780 0x1400c001 0x0423c780 + 0x10068003 0x00001280 0x20001401 0x04004780 + 0x200b8029 0x00000003 0x102c8001 0x00000003 + 0x00021409 0xc0000780 0x00000005 0xc0000780 + 0x1000f829 0x0403c780 0xd4098011 0x20000780 + 0xd801000d 0x20000780 0x20019429 0x00000003 0x1000c001 0x0423c784 0xd0095811 0x20000780 - 0x6c00c025 0x80224780 0x3004c1fd 0x6c2147dc + 0x6c00c00d 0x8020c780 0x300ac1fd 0x6c2147dc 0xd8000809 0x20000780 0xd4000805 0x20000780 0x1000c001 0x0423c784 0x1005d003 0x00001280 - 0xd0096809 0x20000780 0x30050029 0x6c00c780 - 0x1000f811 0x0403c780 0x00000e05 0xc0000780 - 0x1800c001 0x0423c780 0xa0001429 0x2c014780 - 0x10000611 0x2440c280 0x30001201 0xec000780 - 0xd4015805 0x20000780 0xd00a09fd 0x040007c8 - 0x2440c025 0x04200780 0xd009a805 0x20000780 - 0x2400c001 0x04214680 0x30020001 0xc4100680 - 0x2000ca01 0x04200680 0xd00e0025 0xa0c00680 - 0x1000f825 0x0403c100 0x861ffe03 0x00000000 - 0x301f1211 0xec100780 0x30011201 0xc4100780 - 0xd0000801 0x04008780 0x00000e05 0xc0000780 - 0x30840001 0xac400780 0x04001601 0xe4200780 - 0x861ffe03 0x00000000 0xd0095805 0x20000780 - 0x1000d215 0x0423c780 0x20018c11 0x00000003 - 0x1000d201 0x0423c780 0x3505e014 0x40011024 - 0x60001225 0x00024780 0x400b1029 0x00000780 - 0x30101225 0xc4100780 0x600a1229 0x00028780 - 0x200003fd 0x040087c8 0x60001025 0x00024780 - 0x30101401 0xc4100780 0x307cd3fd 0x6c20c7d8 - 0x600a1025 0x00000100 0x1009f003 0x00001280 - 0x1000d201 0x0423c780 0x40050005 0x00000780 - 0x60040205 0x00004780 0x30100205 0xc4100780 - 0x60040011 0x00004780 0x200b8801 0x00000003 - 0x00020005 0xc0000780 0xa009e003 0x00000000 - 0x2000d205 0x04210780 0x20018811 0x00000003 - 0x3606c201 0xec200780 0x300109fd 0x6c0147c8 - 0x20001225 0x04000780 0x10099003 0x00000280 - 0xf0000001 0xe0000002 0x861ffe03 0x00000000 - 0x00000e05 0xc0000780 0x04001601 0xe4224780 - 0x861ffe03 0x00000000 0x300607fd 0x640107c8 - 0x308505fd 0x6440c2c8 0x30000003 0x00000100 - 0x2101f00d 0x00000003 0x100f8001 0x00000003 - 0x30030005 0xc4000780 0x40034e01 0x00200780 - 0x30100001 0xc4100780 0x60024e05 0x00200780 - 0x3003040d 0xc4000780 0x20001011 0x04018780 - 0x30040c01 0xc4100780 0x20038204 0x20008400 - 0x20000805 0x04004780 0x00020005 0xc0000780 - 0x30020205 0xc4100780 0x1500f600 0x2101e804 - 0xd00e0201 0xa0c00781 + 0xd0096809 0x20000780 0x3009002d 0x6c00c780 + 0x1000f829 0x0403c780 0x00000e05 0xc0000780 + 0x1800c001 0x0423c780 0xa000161d 0x2c014780 + 0x10000629 0x2440c280 0x30000601 0xec000780 + 0xd4015805 0x20000780 0xd00715fd 0x040007c8 + 0x2440c001 0x04200780 0xd009a805 0x20000780 + 0x2400c00d 0x04224680 0x3002060d 0xc4100680 + 0x2000ca0d 0x0420c680 0xd00e0601 0xa0c00680 + 0x1000f801 0x0403c100 0x861ffe03 0x00000000 + 0x301f001d 0xec100780 0x30010025 0xc4100780 + 0x1100f20c 0x10008200 0xd0090e1d 0x04008780 + 0x200af003 0x00000780 0x20000201 0x04000780 + 0x00020005 0xc0000780 0x30840e01 0xac400780 + 0x04001601 0xe4200780 0x861ffe03 0x00000000 + 0xd0095805 0x20000780 0x1000d20d 0x0423c780 + 0x20018a05 0x00000003 0x1000d201 0x0423c780 + 0x3503e00c 0x4001041c 0x60000625 0x0001c780 + 0x4007041d 0x00000780 0x30101225 0xc4100780 + 0x6006061d 0x0001c780 0x200005fd 0x040187c8 + 0x60000401 0x00024780 0x30100e09 0xc4100780 + 0x307cd3fd 0x6c20c7d8 0x60060401 0x00008100 + 0x1009f003 0x00001280 0x2101f205 0x00000003 + 0x40010c05 0x00018780 0x200b8209 0x00000003 + 0x00020405 0xc0000780 0xa009e003 0x00000000 + 0x2000d209 0x04204780 0x20018205 0x00000003 + 0x3605c20d 0xec200780 0x300203fd 0x6c0147c8 + 0x20000001 0x0400c780 0x10099003 0x00000280 + 0xf0000001 0xe0000002 0x300609fd 0x640107c8 + 0x30850bfd 0x6440c2c8 0x30000003 0x00000100 + 0x2101f009 0x00000003 0x100f8005 0x00000003 + 0x30020205 0xc4000780 0x40034e0d 0x00200780 + 0x3010060d 0xc4100780 0x30020a09 0xc4000780 + 0x60024e0d 0x0020c780 0x20069004 0x20028608 + 0x20000205 0x04008780 0x30020205 0xc4100780 + 0x2000c805 0x04204780 0xd00e0201 0xa0c00780 + 0x30000003 0x00000780 0xa0000625 0x04114780 + 0xa0001229 0x44004780 0xa000002d 0x04114780 + 0x90001431 0x00000780 0xa0001629 0x44064780 + 0x203e9831 0x0fffffff 0xc00c1429 0x0000c7c0 + 0xa0001429 0x84064780 0x40152435 0x00000780 + 0x60142635 0x00034780 0x30101a35 0xc4100780 + 0x60142435 0x00034780 0x20401635 0x04034780 + 0xa0001a35 0x44064780 0xc00c1a31 0x0000c7c0 + 0xa0001831 0x84064780 0x20001429 0x04030780 + 0x40122a31 0x00000780 0x60132831 0x00030780 + 0x30101831 0xc4100780 0x60122831 0x00030780 + 0x3000182d 0x0402c780 0x300b1225 0x6400c780 + 0xd0000601 0x04008780 0x301f0001 0xe4100780 + 0x30001229 0x04028780 0xa0000025 0x2c014780 + 0xd00a1225 0x04008780 0x307c07fd 0x6c0147c8 + 0x20000001 0x04024780 0xd0030001 0x0402c500 + 0x30000003 0x00000780 0xf0000001 0xe0000001 } } code {