diff --git a/CUETools.FlaCuda/FlaCudaWriter.cs b/CUETools.FlaCuda/FlaCudaWriter.cs index 35db648..ca7f7a5 100644 --- a/CUETools.FlaCuda/FlaCudaWriter.cs +++ b/CUETools.FlaCuda/FlaCudaWriter.cs @@ -919,7 +919,7 @@ namespace CUETools.Codecs.FlaCuda autocorTasks[nAutocorTasks].windowOffs = iWindow * 2 * FlaCudaWriter.MAX_BLOCKSIZE; nAutocorTasks++; // LPC tasks - for (int order = 1; order <= ((max_order + 7) & ~7); order++) + for (int order = 1; order <= max_order; order++) { residualTasks[nResidualTasks].residualOrder = order <= max_order ? order : 0; residualTasks[nResidualTasks].samplesOffs = ch * FlaCudaWriter.MAX_BLOCKSIZE; @@ -929,7 +929,7 @@ namespace CUETools.Codecs.FlaCuda // Fixed prediction for (int ch = 0; ch < channelsCount; ch++) { - for (int order = 1; order <= 8; order++) + for (int order = 1; order <= max_order; order++) { residualTasks[nResidualTasks].residualOrder = order <= 4 ? order : 0; residualTasks[nResidualTasks].samplesOffs = ch * FlaCudaWriter.MAX_BLOCKSIZE; @@ -944,19 +944,19 @@ namespace CUETools.Codecs.FlaCuda residualTasks[nResidualTasks].coefs[0] = 1; break; case 2: - residualTasks[nResidualTasks].coefs[0] = 2; - residualTasks[nResidualTasks].coefs[1] = -1; + residualTasks[nResidualTasks].coefs[1] = 2; + residualTasks[nResidualTasks].coefs[0] = -1; break; case 3: - residualTasks[nResidualTasks].coefs[0] = 3; + residualTasks[nResidualTasks].coefs[2] = 3; residualTasks[nResidualTasks].coefs[1] = -3; - residualTasks[nResidualTasks].coefs[2] = 1; + residualTasks[nResidualTasks].coefs[0] = 1; break; case 4: - residualTasks[nResidualTasks].coefs[0] = 4; - residualTasks[nResidualTasks].coefs[1] = -6; - residualTasks[nResidualTasks].coefs[2] = 4; - residualTasks[nResidualTasks].coefs[3] = -1; + residualTasks[nResidualTasks].coefs[3] = 4; + residualTasks[nResidualTasks].coefs[2] = -6; + residualTasks[nResidualTasks].coefs[1] = 4; + residualTasks[nResidualTasks].coefs[0] = -1; break; } nResidualTasks++; @@ -1037,7 +1037,7 @@ namespace CUETools.Codecs.FlaCuda { for (int order = 1; order <= max_order && order < frame.blocksize; order++) { - int index = (order - 1) + ((max_order + 7) & ~7) * (iWindow + _windowcount * ch); + int index = (order - 1) + max_order * (iWindow + _windowcount * ch); int cbits = residualTasks[index].cbits; int nbits = order * (int)frame.subframes[ch].obits + 4 + 5 + order * cbits + 6 + residualTasks[index].size; if (residualTasks[index].residualOrder != order) @@ -1051,7 +1051,7 @@ namespace CUETools.Codecs.FlaCuda frame.subframes[ch].best.cbits = cbits; frame.subframes[ch].best.shift = residualTasks[index].shift; for (int i = 0; i < order; i++) - frame.subframes[ch].best.coefs[i] = residualTasks[index].coefs[i];//order - 1 - i]; + frame.subframes[ch].best.coefs[i] = residualTasks[index].coefs[order - 1 - i]; } } } @@ -1060,9 +1060,9 @@ namespace CUETools.Codecs.FlaCuda // FIXED for (int ch = 0; ch < channelsCount; ch++) { - for (int order = 1; order <= 5 && order < frame.blocksize; order++) + for (int order = 1; order <= 5 && order <= max_order && order < frame.blocksize; order++) { - int index = (order - 1) + 8 * ch + ((max_order + 7) & ~7) * _windowcount * channelsCount; + int index = (order - 1) + max_order * (ch + _windowcount * channelsCount); int forder = order == 5 ? 0 : order; int nbits = forder * (int)frame.subframes[ch].obits + 6 + residualTasks[index].size; if (residualTasks[index].residualOrder != (order == 5 ? 1 : order)) @@ -1086,7 +1086,22 @@ namespace CUETools.Codecs.FlaCuda } uint cbits = get_precision(frame.blocksize) + 1; - int partSize = 256 - 32; + int threads_y; + if (max_order >= 4 && max_order <= 8) + threads_y = max_order; + else if ((max_order % 8) == 0) + threads_y = 8; + else if ((max_order % 7) == 0) + threads_y = 7; + else if ((max_order % 6) == 0) + threads_y = 6; + else if ((max_order % 5) == 0) + threads_y = 5; + else if ((max_order % 4) == 0) + threads_y = 4; + else + throw new Exception("invalid LPC order"); + int partSize = 32 * (threads_y - 1); partCount = (frame.blocksize + partSize - 1) / partSize; @@ -1100,7 +1115,7 @@ namespace CUETools.Codecs.FlaCuda cuda.SetParameter(cudaEstimateResidual, sizeof(uint) * 4, (uint)frame.blocksize); cuda.SetParameter(cudaEstimateResidual, sizeof(uint) * 5, (uint)partSize); cuda.SetParameterSize(cudaEstimateResidual, sizeof(uint) * 6); - cuda.SetFunctionBlockShape(cudaEstimateResidual, 32, 8, 1); + cuda.SetFunctionBlockShape(cudaEstimateResidual, 32, threads_y, 1); //cuda.SetParameter(cudaSumResidualChunks, 0, (uint)cudaResidualSums.Pointer); //cuda.SetParameter(cudaSumResidualChunks, sizeof(uint), (uint)cudaResidualTasks.Pointer); @@ -1118,7 +1133,7 @@ namespace CUETools.Codecs.FlaCuda cuda.SetFunctionBlockShape(cudaSumResidual, 64, 1, 1); // issue work to the GPU - cuda.LaunchAsync(cudaEstimateResidual, partCount, nResidualTasks / 8, cudaStream); + cuda.LaunchAsync(cudaEstimateResidual, partCount, nResidualTasks / threads_y, cudaStream); //cuda.LaunchAsync(cudaSumResidualChunks, partCount, nResidualTasks, cudaStream); cuda.LaunchAsync(cudaSumResidual, 1, nResidualTasks, cudaStream); cuda.CopyDeviceToHostAsync(cudaResidualTasks, residualTasksPtr, (uint)(sizeof(encodeResidualTaskStruct) * nResidualTasks), cudaStream); @@ -1313,7 +1328,7 @@ namespace CUETools.Codecs.FlaCuda cudaWindow = cuda.Allocate((uint)sizeof(float) * FlaCudaWriter.MAX_BLOCKSIZE * 2 * lpc.MAX_LPC_WINDOWS); cudaAutocorTasks = cuda.Allocate((uint)(sizeof(computeAutocorTaskStruct) * (channels == 2 ? 4 : channels) * lpc.MAX_LPC_WINDOWS)); cudaAutocorOutput = cuda.Allocate((uint)(sizeof(float) * (lpc.MAX_LPC_ORDER + 1) * (channels == 2 ? 4 : channels) * lpc.MAX_LPC_WINDOWS) * maxAutocorParts); - cudaResidualTasks = cuda.Allocate((uint)(sizeof(encodeResidualTaskStruct) * (channels == 2 ? 4 : channels) * (lpc.MAX_LPC_ORDER * lpc.MAX_LPC_WINDOWS + 4))); + cudaResidualTasks = cuda.Allocate((uint)(sizeof(encodeResidualTaskStruct) * (channels == 2 ? 4 : channels) * lpc.MAX_LPC_ORDER * (lpc.MAX_LPC_WINDOWS + 1))); cudaResidualOutput = cuda.Allocate((uint)(sizeof(int) * FlaCudaWriter.MAX_BLOCKSIZE * (channels == 2 ? 4 : channels) * (lpc.MAX_LPC_ORDER * lpc.MAX_LPC_WINDOWS + 4))); cudaResidualSums = cuda.Allocate((uint)(sizeof(int) * (channels == 2 ? 4 : channels) * (lpc.MAX_LPC_ORDER * lpc.MAX_LPC_WINDOWS + 4) * maxResidualParts)); //cudaResidualOutput = cuda.Allocate((uint)(sizeof(int) * (channels == 2 ? 4 : channels) * (lpc.MAX_LPC_ORDER * lpc.MAX_LPC_WINDOWS + 4) * maxResidualParts)); @@ -1321,7 +1336,7 @@ namespace CUETools.Codecs.FlaCuda if (cuErr == CUResult.Success) cuErr = CUDADriver.cuMemAllocHost(ref autocorTasksPtr, (uint)(sizeof(computeAutocorTaskStruct) * (channels == 2 ? 4 : channels) * lpc.MAX_LPC_WINDOWS)); if (cuErr == CUResult.Success) - cuErr = CUDADriver.cuMemAllocHost(ref residualTasksPtr, (uint)(sizeof(encodeResidualTaskStruct) * (channels == 2 ? 4 : channels) * (lpc.MAX_LPC_WINDOWS * lpc.MAX_LPC_ORDER + 8))); + cuErr = CUDADriver.cuMemAllocHost(ref residualTasksPtr, (uint)(sizeof(encodeResidualTaskStruct) * (channels == 2 ? 4 : channels) * (lpc.MAX_LPC_WINDOWS + 1) * lpc.MAX_LPC_ORDER)); if (cuErr != CUResult.Success) { if (samplesBufferPtr != IntPtr.Zero) CUDADriver.cuMemFreeHost(samplesBufferPtr); samplesBufferPtr = IntPtr.Zero; diff --git a/CUETools.FlaCuda/flacuda.cu b/CUETools.FlaCuda/flacuda.cu index b0a6c99..9da0637 100644 --- a/CUETools.FlaCuda/flacuda.cu +++ b/CUETools.FlaCuda/flacuda.cu @@ -155,7 +155,7 @@ extern "C" __global__ void cudaComputeLPC( if (tid < 32) { int precision = 13; - int taskNo = (blockIdx.x + blockIdx.y * gridDim.x) * ((max_order + 7) & ~7) + order; + int taskNo = (blockIdx.x + blockIdx.y * gridDim.x) * max_order + order; shared.bits[tid] = __mul24((33 - __clz(__float2int_rn(fabs(shared.tmp[tid]) * (1 << 15))) - precision), tid <= order); shared.bits[tid] = max(shared.bits[tid], shared.bits[tid + 16]); shared.bits[tid] = max(shared.bits[tid], shared.bits[tid + 8]); @@ -163,7 +163,8 @@ extern "C" __global__ void cudaComputeLPC( shared.bits[tid] = max(shared.bits[tid], shared.bits[tid + 2]); shared.bits[tid] = max(shared.bits[tid], shared.bits[tid + 1]); int sh = max(0,min(15, 15 - shared.bits[0])); - int coef = max(-(1 << precision),min((1 << precision)-1,__float2int_rn(-shared.tmp[tid] * (1 << sh)))); + // reverse coefs + int coef = max(-(1 << precision),min((1 << precision)-1,__float2int_rn(-shared.tmp[order - tid] * (1 << sh)))); if (tid <= order) output[taskNo].coefs[tid] = coef; if (tid == 0) @@ -194,64 +195,54 @@ extern "C" __global__ void cudaEstimateResidual( ) { __shared__ struct { - int data[256]; - int residual[256]; - int rice[256]; + int data[32*8]; + int residual[32*8]; encodeResidualTaskStruct task[8]; } shared; const int tid = threadIdx.x + threadIdx.y * blockDim.x; - // fetch task data (4 * 64 == 256 elements or 8 * 64 == 512 elements); - ((int*)&shared.task)[tid] = ((int*)(tasks + blockIdx.y * blockDim.y))[tid]; - ((int*)&shared.task)[tid + 256] = ((int*)(tasks + blockIdx.y * blockDim.y))[tid + 256]; + if (threadIdx.x < 16) + ((int*)&shared.task[threadIdx.y])[threadIdx.x] = ((int*)(&tasks[blockIdx.y * blockDim.y + threadIdx.y]))[threadIdx.x]; __syncthreads(); - const int partNumber = blockIdx.x; - const int pos = partNumber * partSize; + const int pos = blockIdx.x * partSize; const int dataLen = min(frameSize - pos, partSize + max_order); // fetch samples - shared.data[tid] = (tid < dataLen ? samples[shared.task[0].samplesOffs + pos + tid] : 0); + shared.data[tid] = tid < dataLen ? samples[shared.task[0].samplesOffs + pos + tid] : 0; + shared.residual[tid] = 0; + const int residualLen = max(0,min(frameSize - pos - shared.task[threadIdx.y].residualOrder, partSize)) * (shared.task[threadIdx.y].residualOrder != 0); __syncthreads(); - //if (tid < blockDim.y) shared.sums[tid] = 0; - shared.rice[tid] = 0; - - // set upper residuals to zero, in case blockDim < 256 - //shared.residual[255 - tid] = 0; - - const int residualLen = max(0,min(frameSize - pos - shared.task[threadIdx.y].residualOrder, partSize)) * (shared.task[threadIdx.y].residualOrder != 0); - - // reverse coefs - if (threadIdx.x < shared.task[threadIdx.y].residualOrder) shared.task[threadIdx.y].coefs[threadIdx.x] = shared.task[threadIdx.y].coefs[shared.task[threadIdx.y].residualOrder - 1 - threadIdx.x]; + shared.task[threadIdx.y].coefs[threadIdx.x] = threadIdx.x < max_order ? tasks[blockIdx.y * blockDim.y + threadIdx.y].coefs[threadIdx.x] : 0; for (int i = threadIdx.x; i - threadIdx.x < residualLen; i += blockDim.x) // += 32 { - const int residualOrder = shared.task[threadIdx.y].residualOrder; // compute residual - long sum = 0; - for (int c = 0; c < residualOrder; c++) + int sum = 0; + int c = 0; + for (c = 0; c < shared.task[threadIdx.y].residualOrder; c++) sum += __mul24(shared.data[i + c], shared.task[threadIdx.y].coefs[c]); - int res = shared.data[i + residualOrder] - (sum >> shared.task[threadIdx.y].shift); - shared.residual[tid] = __mul24(i < residualLen, (2 * res) ^ (res >> 31)); - // enable this line when using blockDim.y == 4 - //__syncthreads(); if (threadIdx.x < 32) shared.residual[tid] += shared.residual[tid + 32]; __syncthreads(); - shared.residual[tid] += shared.residual[tid + 16]; - shared.residual[tid] += shared.residual[tid + 8]; - shared.residual[tid] += shared.residual[tid + 4]; - shared.residual[tid] += shared.residual[tid + 2]; - //if (threadIdx.x == 0) shared.sums[threadIdx.y] += shared.residual[tid] + shared.residual[tid + 1]; - shared.rice[tid] += shared.residual[tid] + shared.residual[tid + 1]; + sum = shared.data[i + c] - (sum >> shared.task[threadIdx.y].shift); + shared.residual[tid] += __mul24(i < residualLen, (sum << 1) ^ (sum >> 31)); } - + + // enable this line when using blockDim.x == 64 + //__syncthreads(); if (threadIdx.x < 32) shared.residual[tid] += shared.residual[tid + 32]; __syncthreads(); + shared.residual[tid] += shared.residual[tid + 16]; + shared.residual[tid] += shared.residual[tid + 8]; + shared.residual[tid] += shared.residual[tid + 4]; + shared.residual[tid] += shared.residual[tid + 2]; + shared.residual[tid] += shared.residual[tid + 1]; + // rice parameter search - //shared.rice[tid] = __mul24(threadIdx.x >= 15, 0x7fffff) + residualLen * (threadIdx.x + 1) + ((shared.sums[threadIdx.y] - (residualLen >> 1)) >> threadIdx.x); - shared.rice[tid] = __mul24(threadIdx.x >= 15, 0x7fffff) + residualLen * (threadIdx.x + 1) + ((shared.rice[threadIdx.y * blockDim.x] - (residualLen >> 1)) >> threadIdx.x); - shared.rice[tid] = min(shared.rice[tid], shared.rice[tid + 8]); - shared.rice[tid] = min(shared.rice[tid], shared.rice[tid + 4]); - shared.rice[tid] = min(shared.rice[tid], shared.rice[tid + 2]); - shared.rice[tid] = min(shared.rice[tid], shared.rice[tid + 1]); + shared.residual[tid] = __mul24(threadIdx.x >= 15, 0x7fffff) + residualLen * (threadIdx.x + 1) + ((shared.residual[threadIdx.y * blockDim.x] - (residualLen >> 1)) >> threadIdx.x); + __syncthreads(); + shared.residual[tid] = min(shared.residual[tid], shared.residual[tid + 8]); + shared.residual[tid] = min(shared.residual[tid], shared.residual[tid + 4]); + shared.residual[tid] = min(shared.residual[tid], shared.residual[tid + 2]); + shared.residual[tid] = min(shared.residual[tid], shared.residual[tid + 1]); if (threadIdx.x == 0 && shared.task[threadIdx.y].residualOrder != 0) - output[(blockIdx.y * blockDim.y + threadIdx.y) * gridDim.x + blockIdx.x] = shared.rice[tid]; + output[(blockIdx.y * blockDim.y + threadIdx.y) * gridDim.x + blockIdx.x] = shared.residual[tid]; } // blockDim.x == 256 diff --git a/CUETools.FlaCuda/flacuda.cubin b/CUETools.FlaCuda/flacuda.cubin index 12103c0..f11e288 100644 --- a/CUETools.FlaCuda/flacuda.cubin +++ b/CUETools.FlaCuda/flacuda.cubin @@ -96,7 +96,7 @@ code { code { name = cudaEstimateResidual lmem = 0 - smem = 4648 + smem = 3624 reg = 10 bar = 1 const { @@ -105,92 +105,96 @@ code { offset = 0 bytes = 12 mem { - 0x000003ff 0x00000001 0x0000000e + 0x000003ff 0x0000000f 0x0000000e } } bincode { - 0x10004409 0x0023c780 0xd0800205 0x00400780 - 0xa000420d 0x04200780 0x40024e05 0x00200780 - 0xa0000211 0x04000780 0xa0000001 0x04000780 - 0x30070209 0xc4100780 0x30060205 0xc4100780 - 0x40080c1c 0x20018404 0x20008e18 0x2101ec08 - 0x30020c05 0xc4100780 0x20000205 0x04008780 - 0xd00e0209 0x80c00780 0x00020c05 0xc0000780 - 0x20008205 0x00000043 0xd00e0205 0x80c00780 - 0x04061401 0xe4208780 0x04081401 0xe4204780 - 0x861ffe03 0x00000000 0xa0004c09 0x04200780 - 0x1000d205 0x0423c780 0x4005040d 0x00000780 - 0x6004060d 0x0000c780 0x3010060d 0xc4100780 - 0x60040409 0x0000c780 0x2101ee0c 0x2142f004 - 0x3003020d 0xac000780 0x30030dfd 0x6c0187c8 - 0xa0025003 0x00000000 0x10024003 0x00000280 - 0xd0185805 0x20000780 0x2502e008 0x20028c08 - 0x30020409 0xc4100780 0x2000ca09 0x04208780 - 0xd00e0409 0x80c00780 0x10025003 0x00000780 - 0x1000f809 0x0403c780 0x00020c05 0xc0000782 - 0x04001401 0xe4208780 0x861ffe03 0x00000000 - 0x30070809 0xc4100780 0x3006080d 0xc4100780 - 0x20000409 0x0400c780 0x2028840d 0x00000003 - 0x00000605 0xc0000780 0x00020c09 0xc0000780 - 0x0000040d 0xc0000780 0x08041401 0xe43f0780 - 0xdc185009 0x20000780 0x3800c005 0x04204780 - 0x3001d205 0xac200780 0x387cc00d 0x6c208780 - 0x307c0205 0x8c000780 0x3800c1fd 0x6420c7c8 - 0xd0030215 0x04020780 0xa0044003 0x00000000 - 0x10044003 0x00000280 0xd4180009 0x20000780 - 0x2840c005 0x04200780 0x30020205 0xc4100780 - 0x20000405 0x04004780 0x00000209 0xc0000780 - 0x30020005 0xc4100780 0x20000405 0x04004780 - 0x0000020d 0xc0000780 0xd818c809 0x20000780 - 0x1800c005 0x0423c780 0x0c063401 0xe4204780 - 0x307c0bfd 0x640087ca 0xa007b003 0x00000000 - 0x10000005 0x0403c780 0x1007b003 0x00000280 - 0x00020c09 0xc0000780 0xd8105009 0x20000780 - 0x1800c009 0x0423c780 0xd4180009 0x20000780 - 0x387cc1fd 0x6c20c7c8 0xa005e003 0x00000000 - 0x1000f821 0x0403c780 0x1800c00d 0x0423c780 - 0x1005e003 0x00000280 0x1000f825 0x0403c780 - 0xd4000009 0x20000780 0x2000020d 0x04024780 - 0x0002060d 0xc0000780 0xd8188011 0x20000780 - 0x1000c00d 0x0423c784 0x6c03d421 0x80220780 - 0xd418000d 0x20000780 0x20019225 0x00000003 - 0x3c09c1fd 0x6c2147c8 0xd8000809 0x20000780 - 0x1c00c00d 0x0423c780 0x10053003 0x00000280 + 0xd0800205 0x00400780 0xa0000209 0x04000780 + 0x30070405 0xc4100780 0x3006040d 0xc4100780 + 0xa0000001 0x04000780 0x20000205 0x0400c780 + 0x30020015 0xc4100780 0x2000020d 0x04014780 + 0x308101fd 0x644107c8 0x00000205 0xc0000780 + 0x00000609 0xc0000780 0xa0015003 0x00000000 + 0x10015003 0x00000280 0x10004409 0x0023c780 + 0x60024e05 0x00208780 0x3007020d 0xc4100780 + 0x30060205 0xc4100780 0x20018604 0x2101ec04 + 0x20000a05 0x04004780 0xd00e0205 0x80c00780 + 0x08041401 0xe4204780 0xf0000001 0xe0000002 + 0x861ffe03 0x00000000 0xa0004c11 0x04200780 + 0x1000d205 0x0423c780 0x40080619 0x00000780 + 0xa000420d 0x04200780 0x30100c19 0xc4100780 + 0x60080411 0x00018780 0x40060a05 0x00000780 + 0x30100219 0xc4100780 0x1000d205 0x0423c780 + 0x60060819 0x00018780 0x2101ee0c 0x2144f004 + 0x20000c1d 0x04000780 0x3003020d 0xac000780 + 0x30030ffd 0x6c0187c8 0xd010580d 0x20000780 + 0x2c00c00d 0x04210500 0x20000e0d 0x0400c500 + 0x3002060d 0xc4100500 0x2000ca0d 0x0420c500 + 0xd00e060d 0x80c00500 0x1000f80d 0x0403c280 + 0x00020e0d 0xc0000780 0x0c001401 0xe420c780 + 0xd4105011 0x20000780 0x0c021401 0xe43f0780 + 0x3000c005 0x04204784 0x307cc00d 0x6c208784 + 0x3001d205 0xac200780 0x307c0205 0x8c000780 + 0xd0030211 0x04020780 0xd4005005 0x20000780 + 0x861ffe03 0x00000000 0x3000cffd 0x6420c7c8 + 0xa0044003 0x00000000 0x10043003 0x00000280 + 0x10004409 0x0023c780 0x60024e05 0x00208780 + 0x3007020d 0xc4100780 0x30060205 0xc4100780 + 0x20018604 0x2101ec04 0x20000a05 0x04004780 + 0x20008205 0x00000007 0xd00e0205 0x80c00780 + 0x10044003 0x00000780 0x1000f805 0x0403c780 + 0x307c09fd 0x640087ca 0x08043401 0xe4204780 + 0xa0076003 0x00000000 0x10000005 0x0403c780 + 0x10073003 0x00000280 0x00020e0d 0xc0000780 + 0xdc085009 0x20000780 0x1800c015 0x0423c780 + 0xd4100009 0x20000780 0x387cc1fd 0x6c20c7c8 + 0xa0060003 0x00000000 0x1000f80d 0x0403c780 + 0x1000f825 0x0403c780 0x10060003 0x00000280 + 0xa005e003 0x00000000 0xd4000009 0x20000780 + 0x20000221 0x0400c780 0x0002100d 0xc0000780 + 0xd8108011 0x20000780 0x1000c021 0x0423c784 + 0x6c08d425 0x80224780 0xd410000d 0x20000780 + 0x2001860d 0x00000003 0x3c03c1fd 0x6c2147c8 + 0xd8000809 0x20000780 0x10054003 0x00000280 + 0xd4100009 0x20000782 0x1800c00d 0x0423c780 0x2000020d 0x0400c782 0x00020609 0xc0000780 - 0xd418100d 0x20000780 0x1c00c00d 0x0423c780 - 0x3003100d 0xec000780 0x2840d40d 0x0420c780 + 0xd410100d 0x20000780 0x1c00c00d 0x0423c780 + 0x3003120d 0xec000780 0x2840d40d 0x0420c780 0x301f0621 0xec100780 0x3001060d 0xc4100780 - 0xd0031021 0x04008780 0x30010a0d 0x6c010780 - 0xd081060d 0x04400780 0x4008060d 0x00018780 - 0x00020c09 0xc0000780 0xd808580d 0x20000780 - 0x08021401 0xe420c780 0x2c00de0d 0x0420c780 - 0x08021401 0xe420c780 0x2c00ce0d 0x0420c780 - 0x08021401 0xe420c780 0x2c00c60d 0x0420c780 - 0x08021401 0xe420c780 0x2c00c20d 0x0420c780 - 0x08021401 0xe420c780 0x2d03e00c 0x20038408 + 0xd0031021 0x04008780 0x3001080d 0x6c010780 + 0xa000060d 0x2c014780 0x60080615 0x80014780 0xa000420d 0x04200780 0x20038204 0x2040820c - 0x30030bfd 0x640107c8 0x08041401 0xe4208780 - 0x1004b003 0x00000280 0xf0000001 0xe0000002 - 0x20018005 0x00000003 0x00020c0d 0xc0000780 - 0x40031409 0x00000780 0x60021609 0x00008780 - 0x30100409 0xc4100780 0x60021409 0x00008780 - 0x30820005 0x64410780 0x00020e09 0xc0000780 - 0xd8105009 0x20000780 0xa000020d 0x2c014780 - 0x30010a05 0xec100780 0x407f860d 0x0007ffff - 0x2941e004 0x20038408 0x30000205 0xec000780 - 0x20000205 0x04008780 0xdc105809 0x20000780 - 0x0c041401 0xe4204780 0x3801ce05 0xac200780 - 0x0c041401 0xe4204780 0x3801c605 0xac200780 - 0x0c041401 0xe4204780 0x3801c205 0xac200780 - 0x0c041401 0xe4204780 0x3801c00d 0xac200780 - 0x307c01fd 0x640147c8 0x0c041401 0xe420c780 - 0x30000003 0x00000280 0xd4180005 0x20000780 - 0x347cc1fd 0x6c2087c8 0x30000003 0x00000280 - 0x11002408 0x41022e04 0x20000201 0x04010780 - 0x40014805 0x00200780 0x30100205 0xc4100780 - 0x60004801 0x00204780 0xa0004c09 0x04200780 - 0x20000001 0x04008780 0x30020001 0xc4100780 - 0x2000c801 0x04200780 0xd00e000d 0xa0c00781 + 0x300309fd 0x640107c8 0x00020e0d 0xc0000780 + 0x0c021401 0xe4214780 0x1004c003 0x00000280 + 0x10076003 0x00000780 0x00020e0d 0xc0000780 + 0xdc085009 0x20000780 0x1800c015 0x0423c780 + 0x00020e0d 0xc0000782 0xdc085809 0x20000780 + 0x2800de05 0x04214780 0x0c021401 0xe4204780 + 0x2800ce05 0x04204780 0x0c021401 0xe4204780 + 0x2800c605 0x04204780 0x0c021401 0xe4204780 + 0x2800c20d 0x04204780 0x20018005 0x00000003 + 0x0c021401 0xe420c780 0x2903e00c 0x40031014 + 0x0c021401 0xe420c780 0x6002120d 0x00014780 + 0x30820021 0x64410780 0x00020c09 0xc0000780 + 0xd8085009 0x20000780 0x30100615 0xc4100780 + 0xa0001019 0x2c014780 0x3001080d 0xec100780 + 0x60021011 0x00014780 0x407f8c15 0x0007ffff + 0x2943e004 0x2005880c 0x30000205 0xec000780 + 0x20000205 0x0400c780 0x0c021401 0xe4204780 + 0x861ffe03 0x00000000 0x00020e0d 0xc0000780 + 0xdc085009 0x20000780 0x1800d005 0x0423c780 + 0x3801c005 0xac200780 0x0c021401 0xe4204780 + 0x3801c805 0xac200780 0x0c021401 0xe4204780 + 0x3801c405 0xac200780 0x0c021401 0xe4204780 + 0x3801c205 0xac200780 0x307c01fd 0x640147c8 + 0x0c021401 0xe4204780 0x30000003 0x00000280 + 0xd4100005 0x20000780 0x347cc1fd 0x6c2087c8 + 0x30000003 0x00000280 0x10004401 0x0023c780 + 0x60004e01 0x00208780 0x40014809 0x00200780 + 0x30100409 0xc4100780 0x60004801 0x00208780 + 0xa0004c11 0x04200780 0x20000001 0x04010780 + 0x30020001 0xc4100780 0x2000c801 0x04200780 + 0xd00e0005 0xa0c00781 } } code { @@ -281,11 +285,11 @@ code { segname = const segnum = 1 offset = 0 - bytes = 44 + bytes = 40 mem { 0x00000001 0x0000001f 0x7e800000 0x3f800000 - 0x0000000f 0x00001fff 0xffffe000 0xfffffff8 - 0x3e800000 0x00000020 0x0000009e + 0x0000000f 0x00001fff 0xffffe000 0x3e800000 + 0x00000020 0x0000009e } } bincode { @@ -314,93 +318,92 @@ code { 0x30810601 0x6c40c7d0 0xa00001fd 0x0c0147c8 0x00020605 0xc0001680 0x04001601 0xe43f1680 0xd0035805 0x20000780 0x307ccffd 0x6c20c7d8 - 0x1400c001 0x0423c780 0x30000003 0x00001280 - 0x10248005 0x00000003 0x00000205 0xc0000780 + 0x1400c005 0x0423c780 0x30000003 0x00001280 + 0x10248001 0x00000003 0x00000005 0xc0000780 0x30020611 0xc4100780 0x1000f815 0x0403c780 - 0x20400a05 0x0400c780 0x00020209 0xc0000780 + 0x20400a01 0x0400c780 0x00020009 0xc0000780 0xa004e003 0x00000000 0x30030bfd 0x6c00c7d8 0x1004e003 0x00000100 0xd8035811 0x20000780 - 0x0000080d 0xc0000780 0x1000c005 0x0423c784 - 0xcc01d605 0x00200780 0x1000f805 0x0403d280 - 0xdc016011 0x20000780 0x0c005601 0xe4204780 - 0xb000de05 0x00204784 0x0c005601 0xe4204780 - 0xb000ce05 0x00204784 0x0c005601 0xe4204780 - 0xb000c605 0x00204784 0x0c005601 0xe4204780 - 0xb000c205 0x00204784 0x0c005601 0xe4204780 - 0xb000c005 0x00204784 0x0c005601 0xe4204780 + 0x0000080d 0xc0000780 0x1000c001 0x0423c784 + 0xcc00d601 0x00200780 0x1000f801 0x0403d280 + 0xdc016011 0x20000780 0x0c005601 0xe4200780 + 0xb000de01 0x00200784 0x0c005601 0xe4200780 + 0xb000ce01 0x00200784 0x0c005601 0xe4200780 + 0xb000c601 0x00200784 0x0c005601 0xe4200780 + 0xb000c201 0x00200784 0x0c005601 0xe4200780 + 0xb000c001 0x00200784 0x0c005601 0xe4200780 0xf0000001 0xe0000002 0x861ffe03 0x00000000 0xd0015811 0x20000780 0xd403180d 0x20000780 - 0x1000c005 0x0423c784 0xbc00c009 0x00204780 - 0xb08201fd 0x605107e8 0x10000005 0x0403c780 - 0xa0000409 0xe4004780 0xc0880409 0x00402680 - 0xc0880205 0x00402680 0x90000204 0xc0010418 + 0x1000c001 0x0423c784 0xbc00c009 0x00200780 + 0xb08203fd 0x605107e8 0x10000201 0x0403c780 + 0xa0000409 0xe4004780 0xc0870409 0x00402680 + 0xc0870001 0x00402680 0x90000000 0xc0000418 0xc806d409 0x00200780 0x1000f809 0x0403d280 - 0x30030bfd 0x6c0147d8 0xb0000c05 0x00008780 - 0x10000405 0x0403d280 0x00000809 0xc0000780 - 0xe1060c09 0x0440c780 0xb9017604 0xc0020000 - 0xa00d9003 0x00000000 0x08001601 0xe4204780 - 0x100d9003 0x00000100 0xa0000209 0xc4104780 - 0xc0000409 0x04700003 0xa0000409 0x8c0047d0 - 0xa0000409 0x44065680 0x30170409 0xec101680 - 0x31000409 0x04429680 0x10001209 0x2440d100 - 0x30030a19 0x6c0187d0 0x30148409 0x00000003 - 0xd0800c19 0x04400780 0x00000809 0xc0000780 - 0x40060409 0x00018780 0xd802600d 0x20000780 - 0x08009601 0xe4208780 0x3c02de09 0x8c200780 - 0x08009601 0xe4208780 0x3c02ce09 0x8c200780 - 0x08009601 0xe4208780 0x3c02c609 0x8c200780 - 0x08009601 0xe4208780 0x3c02c209 0x8c200780 - 0x08009601 0xe4208780 0x3c02c009 0x8c200780 - 0x08009601 0xe4208780 0xd0025809 0x20000780 - 0x390fe009 0x00000003 0x30840409 0xac400780 - 0x10018019 0x00000003 0x307c041d 0x8c000780 - 0x30070c09 0xc4000780 0xa0000409 0x44014780 - 0xc0010405 0x00000780 0xa0000205 0xac004780 - 0x30850205 0xac400780 0xa009a003 0x00000000 - 0x30860219 0x8c400780 0x1009a003 0x00001100 - 0xa0004c09 0x04200780 0x10004e09 0x0023c780 - 0x60024805 0x00208780 0x2107ee09 0x00000003 - 0xd0870409 0x04400780 0x40050421 0x00000780 - 0x60040621 0x00020780 0x30101021 0xc4100780 - 0x60040405 0x00020780 0x20000205 0x04014780 - 0x30070209 0xc4100780 0x30060205 0xc4100780 - 0x20018404 0x2101e804 0x20000805 0x04004780 - 0x20008205 0x00000007 0xd00e0219 0xa0c00780 - 0x307c0605 0x6c0087e2 0xa00003fd 0x0c0147d8 - 0xa00ad003 0x00000000 0x100ad003 0x00002100 - 0xa0004c09 0x04200780 0x10004e09 0x0023c780 - 0x60024805 0x00208780 0x2107ee09 0x00000003 - 0xd0870409 0x04400780 0x40050421 0x00000780 - 0x60040621 0x00020780 0x30101021 0xc4100780 - 0x60040405 0x00020780 0x20000205 0x04014780 - 0x30070209 0xc4100780 0x30060205 0xc4100780 - 0x20018404 0x2101e804 0x20088205 0x00000003 - 0xd00e021d 0xa0c00780 0x307c0dfd 0x6c0087ea - 0xa0000c05 0x44066500 0x30170205 0xec102500 - 0x31000205 0x0442a500 0x10001205 0x2440e280 + 0x30030bfd 0x6c0147d8 0xb0000c01 0x00008780 + 0x10000401 0x0403d280 0x0000080d 0xc0000780 + 0xe1060c09 0x0440c780 0xbd007600 0xc0020204 + 0xa00d6003 0x00000000 0x0c001601 0xe4200780 + 0x100d6003 0x00000100 0xa0000001 0xc4104780 + 0xc0000001 0x04700003 0xa0000001 0x8c0047d0 + 0xa0000001 0x44065680 0x30170001 0xec101680 + 0x31000001 0x04425680 0x10001001 0x2440d100 + 0x30030a09 0x6c0187d0 0x30148001 0x00000003 + 0xd0800409 0x04400780 0x0000080d 0xc0000780 + 0x40020001 0x00018780 0xdc026011 0x20000780 + 0x0c009601 0xe4200780 0x3000de01 0x8c200784 + 0x0c009601 0xe4200780 0x3000ce01 0x8c200784 + 0x0c009601 0xe4200780 0x3000c601 0x8c200784 + 0x0c009601 0xe4200780 0x3000c201 0x8c200784 + 0x0c009601 0xe4200780 0x3000c001 0x8c200784 + 0x0c009601 0xe4200780 0xd002580d 0x20000780 + 0x3d0fe001 0x00000003 0x30840001 0xac400780 + 0x10018009 0x00000003 0x307c001d 0x8c000780 + 0x30070401 0xc4000780 0xa0000001 0x44014780 + 0xc800d601 0x00200780 0xa0000001 0xac004780 + 0x30850001 0xac400780 0xa0099003 0x00000000 + 0x30860019 0x8c400780 0x10099003 0x00001100 + 0xa0004c09 0x04200780 0x10004e01 0x0023c780 + 0x60004809 0x00208780 0x1000ce01 0x0423c780 + 0x40050021 0x00000780 0x60040221 0x00020780 + 0x30101021 0xc4100780 0x60040001 0x00020780 + 0x20000001 0x04014780 0x30070009 0xc4100780 + 0x30060001 0xc4100780 0x20008400 0x2100e800 + 0x20000801 0x04000780 0x20008001 0x00000007 + 0xd00e0019 0xa0c00780 0x307c0601 0x6c0087e2 + 0xa00001fd 0x0c0147d8 0xa00ab003 0x00000000 + 0x100ab003 0x00002100 0xa0004c09 0x04200780 + 0x10004e01 0x0023c780 0x60004809 0x00208780 + 0x1000ce01 0x0423c780 0x40050021 0x00000780 + 0x60040221 0x00020780 0x30101021 0xc4100780 + 0x60040001 0x00020780 0x20000001 0x04014780 + 0x30070009 0xc4100780 0x30060001 0xc4100780 + 0x20008400 0x2100e800 0x20088001 0x00000003 + 0xd00e001d 0xa0c00780 0x307c0dfd 0x6c0087ea + 0xa0000c01 0x44066500 0x30170001 0xec102500 + 0x31000001 0x04426500 0x10001001 0x2440e280 0xd0060009 0x0402c780 0x307c05fd 0x6c0087e8 0xa0000409 0x44066500 0x30170409 0xec102500 - 0x31000409 0x0442a500 0x10001209 0x2440e280 - 0x30020205 0x8c000780 0x00000809 0xc0000780 - 0x30218205 0x00000003 0xd802600d 0x20000780 - 0x08009601 0xe4204780 0x3c01de05 0x8c200780 - 0x08009601 0xe4204780 0x3c01ce05 0x8c200780 - 0x08009601 0xe4204780 0x3c01c605 0x8c200780 - 0x08009601 0xe4204780 0x3c01c205 0x8c200780 - 0x08009601 0xe4204780 0x3c01c005 0x8c200780 - 0x08009601 0xe4204780 0x100d9003 0x00001100 - 0xa0004c09 0x04200780 0x10004e09 0x0023c780 - 0x2107ee19 0x00000003 0x60024805 0x00208780 - 0xd0870c09 0x04400780 0x40050419 0x00000780 - 0x60040619 0x00018780 0x30100c19 0xc4100780 - 0x60040405 0x00018780 0x20000205 0x04014780 - 0x30070209 0xc4100780 0x30060205 0xc4100780 - 0x20000405 0x04004780 0xd0025809 0x20000780 - 0x2101e808 0x1900e004 0x200c8409 0x00000003 - 0xd00e0405 0xa0c00780 0xf0000001 0xe0000002 - 0x861ffe03 0x00000000 0x20018a15 0x00000003 - 0x3005cffd 0x6c2147d8 0xd4000805 0x20000780 - 0x10038003 0x00001280 0xf0000001 0xe0000001 + 0x31000409 0x04426500 0x10001009 0x2440e280 + 0x30020001 0x8c000780 0x00000809 0xc0000780 + 0x30218001 0x00000003 0xd802600d 0x20000780 + 0x08009601 0xe4200780 0x3c00de01 0x8c200780 + 0x08009601 0xe4200780 0x3c00ce01 0x8c200780 + 0x08009601 0xe4200780 0x3c00c601 0x8c200780 + 0x08009601 0xe4200780 0x3c00c201 0x8c200780 + 0x08009601 0xe4200780 0x3c00c001 0x8c200780 + 0x08009601 0xe4200780 0x100d6003 0x00001100 + 0xa0004c09 0x04200780 0x10004e01 0x0023c780 + 0x60004809 0x00208780 0x1000ce01 0x0423c780 + 0x40050019 0x00000780 0x60040219 0x00018780 + 0x30100c19 0xc4100780 0x60040001 0x00018780 + 0x20000001 0x04014780 0x30070009 0xc4100780 + 0x30060001 0xc4100780 0x20000401 0x04000780 + 0xd0025809 0x20000780 0x2100e808 0x1900e000 + 0x200c8409 0x00000003 0xd00e0401 0xa0c00780 + 0xf0000001 0xe0000002 0x861ffe03 0x00000000 + 0x20018a15 0x00000003 0x3005cffd 0x6c2147d8 + 0xd4000805 0x20000780 0x10038003 0x00001280 + 0xf0000001 0xe0000001 } } code {