diff --git a/CUETools.FlaCuda/FlaCudaWriter.cs b/CUETools.FlaCuda/FlaCudaWriter.cs index ca7f7a5..5439834 100644 --- a/CUETools.FlaCuda/FlaCudaWriter.cs +++ b/CUETools.FlaCuda/FlaCudaWriter.cs @@ -73,6 +73,7 @@ namespace CUETools.Codecs.FlaCuda int[] verifyBuffer; int[] residualBuffer; float[] windowBuffer; + byte[] md5_buffer; int samplesInBuffer = 0; int _compressionLevel = 5; @@ -84,7 +85,7 @@ namespace CUETools.Codecs.FlaCuda Crc16 crc16; MD5 md5; - FlacFrame frame; + FlacFrame _frame; FlakeReader verify; SeekPoint[] seek_table; @@ -105,7 +106,6 @@ namespace CUETools.Codecs.FlaCuda CUdeviceptr cudaAutocorOutput; CUdeviceptr cudaResidualTasks; CUdeviceptr cudaResidualOutput; - CUdeviceptr cudaResidualSums; IntPtr samplesBufferPtr = IntPtr.Zero; IntPtr autocorTasksPtr = IntPtr.Zero; IntPtr residualTasksPtr = IntPtr.Zero; @@ -114,9 +114,10 @@ namespace CUETools.Codecs.FlaCuda int nResidualTasks = 0; int nAutocorTasks = 0; + int maxFrames = 8; - const int MAX_BLOCKSIZE = 8192; - const int maxResidualParts = 64; + const int MAX_BLOCKSIZE = 4608 * 4; + const int maxResidualParts = (MAX_BLOCKSIZE + 32 * 3) / (32 * 3); const int maxAutocorParts = MAX_BLOCKSIZE / (256 - 32); public FlaCudaWriter(string path, int bitsPerSample, int channelCount, int sampleRate, Stream IO) @@ -137,13 +138,14 @@ namespace CUETools.Codecs.FlaCuda residualBuffer = new int[FlaCudaWriter.MAX_BLOCKSIZE * (channels == 2 ? 10 : channels + 1)]; windowBuffer = new float[FlaCudaWriter.MAX_BLOCKSIZE * 2 * lpc.MAX_LPC_WINDOWS]; + md5_buffer = new byte[FlaCudaWriter.MAX_BLOCKSIZE * channels * bits_per_sample / 8]; eparams.flake_set_defaults(_compressionLevel); eparams.padding_size = 8192; crc8 = new Crc8(); crc16 = new Crc16(); - frame = new FlacFrame(channels * 2); + _frame = new FlacFrame(channels * 2); } public int TotalSize @@ -193,11 +195,20 @@ namespace CUETools.Codecs.FlaCuda while (samplesInBuffer > 0) { eparams.block_size = samplesInBuffer; - output_frame(); + output_frames(); } if (_IO.CanSeek) { + if (sample_count == 0 && _position != 0) + { + BitWriter bitwriter = new BitWriter(header, 0, 4); + bitwriter.writebits(32, (int)_position); + bitwriter.flush(); + _IO.Position = 22; + _IO.Write(header, 0, 4); + } + if (md5 != null) { md5.TransformFinalBlock(frame_buffer, 0, 0); @@ -220,7 +231,6 @@ namespace CUETools.Codecs.FlaCuda cuda.Free(cudaAutocorOutput); cuda.Free(cudaResidualTasks); cuda.Free(cudaResidualOutput); - cuda.Free(cudaResidualSums); CUDADriver.cuMemFreeHost(samplesBufferPtr); CUDADriver.cuMemFreeHost(residualTasksPtr); CUDADriver.cuMemFreeHost(autocorTasksPtr); @@ -253,7 +263,6 @@ namespace CUETools.Codecs.FlaCuda cuda.Free(cudaAutocorOutput); cuda.Free(cudaResidualTasks); cuda.Free(cudaResidualOutput); - cuda.Free(cudaResidualSums); CUDADriver.cuMemFreeHost(samplesBufferPtr); CUDADriver.cuMemFreeHost(residualTasksPtr); CUDADriver.cuMemFreeHost(autocorTasksPtr); @@ -905,64 +914,67 @@ namespace CUETools.Codecs.FlaCuda _windowcount++; } - unsafe void initialize_autocorTasks(int channelsCount, int max_order) + unsafe void initialize_autocorTasks(int blocksize, int channelsCount, int max_order, int nFrames) { computeAutocorTaskStruct* autocorTasks = (computeAutocorTaskStruct*)autocorTasksPtr; encodeResidualTaskStruct* residualTasks = (encodeResidualTaskStruct*)residualTasksPtr; nAutocorTasks = 0; nResidualTasks = 0; - for (int ch = 0; ch < channelsCount; ch++) - for (int iWindow = 0; iWindow < _windowcount; iWindow++) + for (int iFrame = 0; iFrame < nFrames; iFrame++) + { + for (int ch = 0; ch < channelsCount; ch++) { - // Autocorelation task - autocorTasks[nAutocorTasks].samplesOffs = ch * FlaCudaWriter.MAX_BLOCKSIZE; - autocorTasks[nAutocorTasks].windowOffs = iWindow * 2 * FlaCudaWriter.MAX_BLOCKSIZE; - nAutocorTasks++; - // LPC tasks + for (int iWindow = 0; iWindow < _windowcount; iWindow++) + { + // Autocorelation task + autocorTasks[nAutocorTasks].samplesOffs = ch * FlaCudaWriter.MAX_BLOCKSIZE + iFrame * blocksize; + autocorTasks[nAutocorTasks].windowOffs = iWindow * 2 * FlaCudaWriter.MAX_BLOCKSIZE; + autocorTasks[nAutocorTasks].residualOffs = max_order * (iWindow + (_windowcount + 1) * (ch + iFrame * channelsCount)); + autocorTasks[nAutocorTasks].blocksize = blocksize; + nAutocorTasks++; + // LPC tasks + for (int order = 1; order <= max_order; order++) + { + residualTasks[nResidualTasks].residualOrder = order <= max_order ? order : 0; + residualTasks[nResidualTasks].samplesOffs = ch * FlaCudaWriter.MAX_BLOCKSIZE + iFrame * blocksize; + nResidualTasks++; + } + } + // Fixed prediction for (int order = 1; order <= max_order; order++) { - residualTasks[nResidualTasks].residualOrder = order <= max_order ? order : 0; - residualTasks[nResidualTasks].samplesOffs = ch * FlaCudaWriter.MAX_BLOCKSIZE; + residualTasks[nResidualTasks].residualOrder = order <= 4 ? order : 0; + residualTasks[nResidualTasks].samplesOffs = ch * FlaCudaWriter.MAX_BLOCKSIZE + iFrame * blocksize; + residualTasks[nResidualTasks].shift = 0; + switch (order) + { + case 5: + residualTasks[nResidualTasks].residualOrder = 1; + residualTasks[nResidualTasks].coefs[0] = 0; + break; + case 1: + residualTasks[nResidualTasks].coefs[0] = 1; + break; + case 2: + residualTasks[nResidualTasks].coefs[1] = 2; + residualTasks[nResidualTasks].coefs[0] = -1; + break; + case 3: + residualTasks[nResidualTasks].coefs[2] = 3; + residualTasks[nResidualTasks].coefs[1] = -3; + residualTasks[nResidualTasks].coefs[0] = 1; + break; + case 4: + residualTasks[nResidualTasks].coefs[3] = 4; + residualTasks[nResidualTasks].coefs[2] = -6; + residualTasks[nResidualTasks].coefs[1] = 4; + residualTasks[nResidualTasks].coefs[0] = -1; + break; + } nResidualTasks++; } } - // Fixed prediction - for (int ch = 0; ch < channelsCount; ch++) - { - for (int order = 1; order <= max_order; order++) - { - residualTasks[nResidualTasks].residualOrder = order <= 4 ? order : 0; - residualTasks[nResidualTasks].samplesOffs = ch * FlaCudaWriter.MAX_BLOCKSIZE; - residualTasks[nResidualTasks].shift = 0; - switch (order) - { - case 5: - residualTasks[nResidualTasks].residualOrder = 1; - residualTasks[nResidualTasks].coefs[0] = 0; - break; - case 1: - residualTasks[nResidualTasks].coefs[0] = 1; - break; - case 2: - residualTasks[nResidualTasks].coefs[1] = 2; - residualTasks[nResidualTasks].coefs[0] = -1; - break; - case 3: - residualTasks[nResidualTasks].coefs[2] = 3; - residualTasks[nResidualTasks].coefs[1] = -3; - residualTasks[nResidualTasks].coefs[0] = 1; - break; - case 4: - residualTasks[nResidualTasks].coefs[3] = 4; - residualTasks[nResidualTasks].coefs[2] = -6; - residualTasks[nResidualTasks].coefs[1] = 4; - residualTasks[nResidualTasks].coefs[0] = -1; - break; - } - nResidualTasks++; - } } - cuda.CopyHostToDeviceAsync(cudaAutocorTasks, autocorTasksPtr, (uint)(sizeof(computeAutocorTaskStruct) * nAutocorTasks), cudaStream); cuda.CopyHostToDeviceAsync(cudaResidualTasks, residualTasksPtr, (uint)(sizeof(encodeResidualTaskStruct) * nResidualTasks), cudaStream); cuda.SynchronizeStream(cudaStream); @@ -1004,7 +1016,7 @@ namespace CUETools.Codecs.FlaCuda } } - unsafe void select_best_methods(FlacFrame frame, int channelsCount, int max_order, int partCount) + unsafe void select_best_methods(FlacFrame frame, int channelsCount, int max_order, int iFrame) { encodeResidualTaskStruct* residualTasks = (encodeResidualTaskStruct*)residualTasksPtr; for (int ch = 0; ch < channelsCount; ch++) @@ -1037,7 +1049,7 @@ namespace CUETools.Codecs.FlaCuda { for (int order = 1; order <= max_order && order < frame.blocksize; order++) { - int index = (order - 1) + max_order * (iWindow + _windowcount * ch); + int index = (order - 1) + max_order * (iWindow + (_windowcount + 1) * (ch + iFrame * channelsCount)); int cbits = residualTasks[index].cbits; int nbits = order * (int)frame.subframes[ch].obits + 4 + 5 + order * cbits + 6 + residualTasks[index].size; if (residualTasks[index].residualOrder != order) @@ -1062,7 +1074,7 @@ namespace CUETools.Codecs.FlaCuda { for (int order = 1; order <= 5 && order <= max_order && order < frame.blocksize; order++) { - int index = (order - 1) + max_order * (ch + _windowcount * channelsCount); + int index = (order - 1) + max_order * (_windowcount + (_windowcount + 1) * (ch + iFrame * channelsCount)); int forder = order == 5 ? 0 : order; int nbits = forder * (int)frame.subframes[ch].obits + 6 + residualTasks[index].size; if (residualTasks[index].residualOrder != (order == 5 ? 1 : order)) @@ -1077,15 +1089,14 @@ namespace CUETools.Codecs.FlaCuda } } - unsafe void estimate_residual(FlacFrame frame, int channelsCount, int max_order, int autocorPartCount, out int partCount) + unsafe void estimate_residual(int blocksize, int channelsCount, int max_order, int nFrames) { - if (frame.blocksize <= 4) - { - partCount = 0; + if (blocksize <= 4) return; - } - uint cbits = get_precision(frame.blocksize) + 1; + compute_autocorellation(blocksize, channelsCount, max_order, nFrames); + + uint cbits = get_precision(blocksize) + 1; int threads_y; if (max_order >= 4 && max_order <= 8) threads_y = max_order; @@ -1102,8 +1113,7 @@ namespace CUETools.Codecs.FlaCuda else throw new Exception("invalid LPC order"); int partSize = 32 * (threads_y - 1); - - partCount = (frame.blocksize + partSize - 1) / partSize; + int partCount = (blocksize + partSize - 1) / partSize; if (partCount > maxResidualParts) throw new Exception("invalid combination of block size and LPC order"); @@ -1112,19 +1122,11 @@ namespace CUETools.Codecs.FlaCuda cuda.SetParameter(cudaEstimateResidual, sizeof(uint) * 1, (uint)cudaSamples.Pointer); cuda.SetParameter(cudaEstimateResidual, sizeof(uint) * 2, (uint)cudaResidualTasks.Pointer); cuda.SetParameter(cudaEstimateResidual, sizeof(uint) * 3, (uint)max_order); - cuda.SetParameter(cudaEstimateResidual, sizeof(uint) * 4, (uint)frame.blocksize); + cuda.SetParameter(cudaEstimateResidual, sizeof(uint) * 4, (uint)blocksize); cuda.SetParameter(cudaEstimateResidual, sizeof(uint) * 5, (uint)partSize); cuda.SetParameterSize(cudaEstimateResidual, sizeof(uint) * 6); cuda.SetFunctionBlockShape(cudaEstimateResidual, 32, threads_y, 1); - //cuda.SetParameter(cudaSumResidualChunks, 0, (uint)cudaResidualSums.Pointer); - //cuda.SetParameter(cudaSumResidualChunks, sizeof(uint), (uint)cudaResidualTasks.Pointer); - //cuda.SetParameter(cudaSumResidualChunks, sizeof(uint) * 2, (uint)cudaResidualOutput.Pointer); - //cuda.SetParameter(cudaSumResidualChunks, sizeof(uint) * 3, (uint)frame.blocksize); - //cuda.SetParameter(cudaSumResidualChunks, sizeof(uint) * 4, (uint)partSize); - //cuda.SetParameterSize(cudaSumResidualChunks, sizeof(uint) * 5U); - //cuda.SetFunctionBlockShape(cudaSumResidualChunks, residualThreads, 1, 1); - cuda.SetParameter(cudaSumResidual, 0, (uint)cudaResidualTasks.Pointer); cuda.SetParameter(cudaSumResidual, sizeof(uint), (uint)cudaResidualOutput.Pointer); cuda.SetParameter(cudaSumResidual, sizeof(uint) * 2, (uint)partSize); @@ -1133,24 +1135,24 @@ namespace CUETools.Codecs.FlaCuda cuda.SetFunctionBlockShape(cudaSumResidual, 64, 1, 1); // issue work to the GPU - cuda.LaunchAsync(cudaEstimateResidual, partCount, nResidualTasks / threads_y, cudaStream); + cuda.LaunchAsync(cudaEstimateResidual, partCount, (nResidualTasks / threads_y * nFrames) / maxFrames, cudaStream); //cuda.LaunchAsync(cudaSumResidualChunks, partCount, nResidualTasks, cudaStream); - cuda.LaunchAsync(cudaSumResidual, 1, nResidualTasks, cudaStream); - cuda.CopyDeviceToHostAsync(cudaResidualTasks, residualTasksPtr, (uint)(sizeof(encodeResidualTaskStruct) * nResidualTasks), cudaStream); + cuda.LaunchAsync(cudaSumResidual, 1, (nResidualTasks * nFrames) / maxFrames, cudaStream); + cuda.CopyDeviceToHostAsync(cudaResidualTasks, residualTasksPtr, (uint)(sizeof(encodeResidualTaskStruct) * ((nResidualTasks * nFrames) / maxFrames)), cudaStream); cuda.SynchronizeStream(cudaStream); } - unsafe void compute_autocorellation(FlacFrame frame, int channelsCount, int max_order, out int partCount) + unsafe void compute_autocorellation(int blocksize, int channelsCount, int max_order, int nFrames) { int autocorThreads = 256; int partSize = 2 * autocorThreads - max_order; partSize &= 0xffffff0; - partCount = (frame.blocksize + partSize - 1) / partSize; + int partCount = (blocksize + partSize - 1) / partSize; if (partCount > maxAutocorParts) throw new Exception("internal error"); - if (frame.blocksize <= 4) + if (blocksize <= 4) return; cuda.SetParameter(cudaComputeAutocor, 0, (uint)cudaAutocorOutput.Pointer); @@ -1158,7 +1160,7 @@ namespace CUETools.Codecs.FlaCuda cuda.SetParameter(cudaComputeAutocor, sizeof(uint) * 2, (uint)cudaWindow.Pointer); cuda.SetParameter(cudaComputeAutocor, sizeof(uint) * 3, (uint)cudaAutocorTasks.Pointer); cuda.SetParameter(cudaComputeAutocor, sizeof(uint) * 4, (uint)max_order); - cuda.SetParameter(cudaComputeAutocor, sizeof(uint) * 4 + sizeof(uint), (uint)frame.blocksize); + cuda.SetParameter(cudaComputeAutocor, sizeof(uint) * 4 + sizeof(uint), (uint)blocksize); cuda.SetParameter(cudaComputeAutocor, sizeof(uint) * 4 + sizeof(uint) * 2, (uint)partSize); cuda.SetParameterSize(cudaComputeAutocor, (uint)(sizeof(uint) * 4) + sizeof(uint) * 3); cuda.SetFunctionBlockShape(cudaComputeAutocor, autocorThreads, 1, 1); @@ -1173,50 +1175,26 @@ namespace CUETools.Codecs.FlaCuda // issue work to the GPU cuda.CopyHostToDeviceAsync(cudaSamples, samplesBufferPtr, (uint)(sizeof(int) * FlaCudaWriter.MAX_BLOCKSIZE * channelsCount), cudaStream); - cuda.LaunchAsync(cudaComputeAutocor, partCount, nAutocorTasks, cudaStream); - cuda.LaunchAsync(cudaComputeLPC, 1, nAutocorTasks, cudaStream); + cuda.LaunchAsync(cudaComputeAutocor, partCount, (nAutocorTasks * nFrames) / maxFrames, cudaStream); + cuda.LaunchAsync(cudaComputeLPC, 1, (nAutocorTasks * nFrames) / maxFrames, cudaStream); //cuda.SynchronizeStream(cudaStream); //cuda.CopyDeviceToHostAsync(cudaResidualTasks, residualTasksPtr, (uint)(sizeof(encodeResidualTaskStruct) * nResidualTasks), cudaStream1); } - unsafe int encode_frame(out int size) + unsafe int encode_frame(bool doMidside, int channelCount, int iFrame) { - int* s = (int*)samplesBufferPtr; fixed (int* r = residualBuffer) - fixed (float* window = windowBuffer) { + FlacFrame frame = _frame; frame.InitSize(eparams.block_size, eparams.variable_block_size != 0); - - bool doMidside = channels == 2 && eparams.do_midside; - int channelCount = doMidside ? 2 * channels : channels; - - if (frame.blocksize != _windowsize && frame.blocksize > 4) + for (int ch = 0; ch < channelCount; ch++) { - _windowsize = frame.blocksize; - _windowcount = 0; - calculate_window(window, lpc.window_welch, WindowFunction.Welch); - calculate_window(window, lpc.window_tukey, WindowFunction.Tukey); - calculate_window(window, lpc.window_hann, WindowFunction.Hann); - calculate_window(window, lpc.window_flattop, WindowFunction.Flattop); - calculate_window(window, lpc.window_bartlett, WindowFunction.Bartlett); - if (_windowcount == 0) - throw new Exception("invalid windowfunction"); - cuda.CopyHostToDevice(cudaWindow, windowBuffer); - initialize_autocorTasks(channelCount, eparams.max_prediction_order); + int* s = ((int*)samplesBufferPtr) + ch * FlaCudaWriter.MAX_BLOCKSIZE + iFrame * eparams.block_size; + frame.subframes[ch].Init(s, r + ch * FlaCudaWriter.MAX_BLOCKSIZE, + bits_per_sample + (doMidside && ch == 3 ? 1U : 0U), 0);// get_wasted_bits(s + ch * FlaCudaWriter.MAX_BLOCKSIZE, frame.blocksize)); } - if (doMidside) - channel_decorrelation(s, s + FlaCudaWriter.MAX_BLOCKSIZE, s + 2 * FlaCudaWriter.MAX_BLOCKSIZE, s + 3 * FlaCudaWriter.MAX_BLOCKSIZE, frame.blocksize); - - frame.window_buffer = window; - for (int ch = 0; ch < channelCount; ch++) - frame.subframes[ch].Init(s + ch * FlaCudaWriter.MAX_BLOCKSIZE, r + ch * FlaCudaWriter.MAX_BLOCKSIZE, - bits_per_sample + (doMidside && ch == 3 ? 1U : 0U), get_wasted_bits(s + ch * FlaCudaWriter.MAX_BLOCKSIZE, frame.blocksize)); - - int autocorPartCount, residualPartCount; - compute_autocorellation(frame, channelCount, eparams.max_prediction_order, out autocorPartCount); - estimate_residual(frame, channelCount, eparams.max_prediction_order, autocorPartCount, out residualPartCount); - select_best_methods(frame, channelCount, eparams.max_prediction_order, residualPartCount); + select_best_methods(frame, channelCount, eparams.max_prediction_order, iFrame); if (doMidside) { @@ -1241,66 +1219,101 @@ namespace CUETools.Codecs.FlaCuda else frame_count++; } - size = frame.blocksize; return bitwriter.Length; } } - unsafe int output_frame() + unsafe int output_frames() { + bool doMidside = channels == 2 && eparams.do_midside; + int channelCount = doMidside ? 2 * channels : channels; + int nFrames = Math.Min(samplesInBuffer / eparams.block_size, maxFrames); + + if (nFrames < 1) + throw new Exception("oops"); + if (verify != null) { int* r = (int*)samplesBufferPtr; fixed (int* s = verifyBuffer) for (int ch = 0; ch < channels; ch++) - AudioSamples.MemCpy(s + ch * FlaCudaWriter.MAX_BLOCKSIZE, r + ch * FlaCudaWriter.MAX_BLOCKSIZE, eparams.block_size); + AudioSamples.MemCpy(s + ch * FlaCudaWriter.MAX_BLOCKSIZE, r + ch * FlaCudaWriter.MAX_BLOCKSIZE, samplesInBuffer); } - int fs, bs; - //if (0 != eparams.variable_block_size && 0 == (eparams.block_size & 7) && eparams.block_size >= 128) - // fs = encode_frame_vbs(); - //else - fs = encode_frame(out bs); - - if (seek_table != null && _IO.CanSeek) - { - for (int sp = 0; sp < seek_table.Length; sp++) + if (eparams.block_size != _windowsize && eparams.block_size > 4) + fixed (float* window = windowBuffer) { - if (seek_table[sp].framesize != 0) - continue; - if (seek_table[sp].number > (ulong)_position + (ulong)bs) - break; - if (seek_table[sp].number >= (ulong)_position) + _windowsize = eparams.block_size; + _windowcount = 0; + calculate_window(window, lpc.window_welch, WindowFunction.Welch); + calculate_window(window, lpc.window_tukey, WindowFunction.Tukey); + calculate_window(window, lpc.window_hann, WindowFunction.Hann); + calculate_window(window, lpc.window_flattop, WindowFunction.Flattop); + calculate_window(window, lpc.window_bartlett, WindowFunction.Bartlett); + if (_windowcount == 0) + throw new Exception("invalid windowfunction"); + cuda.CopyHostToDevice(cudaWindow, windowBuffer); + initialize_autocorTasks(eparams.block_size, channelCount, eparams.max_prediction_order, maxFrames); + } + + if (doMidside) + { + int* s = ((int*)samplesBufferPtr); + channel_decorrelation(s, s + FlaCudaWriter.MAX_BLOCKSIZE, + s + 2 * FlaCudaWriter.MAX_BLOCKSIZE, s + 3 * FlaCudaWriter.MAX_BLOCKSIZE, eparams.block_size * nFrames); + } + + estimate_residual(eparams.block_size, channelCount, eparams.max_prediction_order, nFrames); + + int bs = 0; + for (int iFrame = 0; iFrame < nFrames; iFrame++) + { + //if (0 != eparams.variable_block_size && 0 == (eparams.block_size & 7) && eparams.block_size >= 128) + // fs = encode_frame_vbs(); + //else + int fs = encode_frame(doMidside, channelCount, iFrame); + bs += eparams.block_size; + + if (seek_table != null && _IO.CanSeek) + { + for (int sp = 0; sp < seek_table.Length; sp++) { - seek_table[sp].number = (ulong)_position; - seek_table[sp].offset = (ulong)(_IO.Position - first_frame_offset); - seek_table[sp].framesize = (uint)bs; + if (seek_table[sp].framesize != 0) + continue; + if (seek_table[sp].number > (ulong)_position + (ulong)eparams.block_size) + break; + if (seek_table[sp].number >= (ulong)_position) + { + seek_table[sp].number = (ulong)_position; + seek_table[sp].offset = (ulong)(_IO.Position - first_frame_offset); + seek_table[sp].framesize = (uint)eparams.block_size; + } + } + } + + _position += eparams.block_size; + _IO.Write(frame_buffer, 0, fs); + _totalSize += fs; + + if (verify != null) + { + int decoded = verify.DecodeFrame(frame_buffer, 0, fs); + if (decoded != fs || verify.Remaining != (ulong)eparams.block_size) + throw new Exception("validation failed!"); + fixed (int* s = verifyBuffer, r = verify.Samples) + { + for (int ch = 0; ch < channels; ch++) + if (AudioSamples.MemCmp(s + iFrame * eparams.block_size + ch * FlaCudaWriter.MAX_BLOCKSIZE, r + ch * Flake.MAX_BLOCKSIZE, eparams.block_size)) + throw new Exception("validation failed!"); } } } - - _position += bs; - _IO.Write(frame_buffer, 0, fs); - _totalSize += fs; - - if (verify != null) - { - int decoded = verify.DecodeFrame(frame_buffer, 0, fs); - if (decoded != fs || verify.Remaining != (ulong)bs) - throw new Exception("validation failed!"); - fixed (int* s = verifyBuffer, r = verify.Samples) - { - for (int ch = 0; ch < channels; ch++) - if (AudioSamples.MemCmp(s + ch * FlaCudaWriter.MAX_BLOCKSIZE, r + ch * Flake.MAX_BLOCKSIZE, bs)) - throw new Exception("validation failed!"); - } - } - - if (bs < eparams.block_size) + + if (bs < samplesInBuffer) { int* s = (int*)samplesBufferPtr; for (int ch = 0; ch < channels; ch++) - AudioSamples.MemCpy(s + ch * FlaCudaWriter.MAX_BLOCKSIZE, s + bs + ch * FlaCudaWriter.MAX_BLOCKSIZE, eparams.block_size - bs); + AudioSamples.MemCpy(s + ch * FlaCudaWriter.MAX_BLOCKSIZE, s + bs + ch * FlaCudaWriter.MAX_BLOCKSIZE, samplesInBuffer - bs); } samplesInBuffer -= bs; @@ -1326,17 +1339,16 @@ namespace CUETools.Codecs.FlaCuda cudaEncodeResidual = cuda.GetModuleFunction("cudaEncodeResidual"); cudaSamples = cuda.Allocate((uint)(sizeof(int) * FlaCudaWriter.MAX_BLOCKSIZE * (channels == 2 ? 4 : channels))); cudaWindow = cuda.Allocate((uint)sizeof(float) * FlaCudaWriter.MAX_BLOCKSIZE * 2 * lpc.MAX_LPC_WINDOWS); - cudaAutocorTasks = cuda.Allocate((uint)(sizeof(computeAutocorTaskStruct) * (channels == 2 ? 4 : channels) * lpc.MAX_LPC_WINDOWS)); - cudaAutocorOutput = cuda.Allocate((uint)(sizeof(float) * (lpc.MAX_LPC_ORDER + 1) * (channels == 2 ? 4 : channels) * lpc.MAX_LPC_WINDOWS) * maxAutocorParts); - cudaResidualTasks = cuda.Allocate((uint)(sizeof(encodeResidualTaskStruct) * (channels == 2 ? 4 : channels) * lpc.MAX_LPC_ORDER * (lpc.MAX_LPC_WINDOWS + 1))); - cudaResidualOutput = cuda.Allocate((uint)(sizeof(int) * FlaCudaWriter.MAX_BLOCKSIZE * (channels == 2 ? 4 : channels) * (lpc.MAX_LPC_ORDER * lpc.MAX_LPC_WINDOWS + 4))); - cudaResidualSums = cuda.Allocate((uint)(sizeof(int) * (channels == 2 ? 4 : channels) * (lpc.MAX_LPC_ORDER * lpc.MAX_LPC_WINDOWS + 4) * maxResidualParts)); + cudaAutocorTasks = cuda.Allocate((uint)(sizeof(computeAutocorTaskStruct) * (channels == 2 ? 4 : channels) * lpc.MAX_LPC_WINDOWS * maxFrames)); + cudaAutocorOutput = cuda.Allocate((uint)(sizeof(float) * (channels == 2 ? 4 : channels) * lpc.MAX_LPC_WINDOWS * (lpc.MAX_LPC_ORDER + 1) * maxAutocorParts)); + cudaResidualTasks = cuda.Allocate((uint)(sizeof(encodeResidualTaskStruct) * (channels == 2 ? 4 : channels) * lpc.MAX_LPC_ORDER * (lpc.MAX_LPC_WINDOWS + 1) * maxFrames)); + cudaResidualOutput = cuda.Allocate((uint)(sizeof(int) * (channels == 2 ? 4 : channels) * (lpc.MAX_LPC_WINDOWS + 1) * lpc.MAX_LPC_ORDER * maxResidualParts)); //cudaResidualOutput = cuda.Allocate((uint)(sizeof(int) * (channels == 2 ? 4 : channels) * (lpc.MAX_LPC_ORDER * lpc.MAX_LPC_WINDOWS + 4) * maxResidualParts)); CUResult cuErr = CUDADriver.cuMemAllocHost(ref samplesBufferPtr, (uint)(sizeof(int) * (channels == 2 ? 4 : channels) * FlaCudaWriter.MAX_BLOCKSIZE)); if (cuErr == CUResult.Success) - cuErr = CUDADriver.cuMemAllocHost(ref autocorTasksPtr, (uint)(sizeof(computeAutocorTaskStruct) * (channels == 2 ? 4 : channels) * lpc.MAX_LPC_WINDOWS)); + cuErr = CUDADriver.cuMemAllocHost(ref autocorTasksPtr, (uint)(sizeof(computeAutocorTaskStruct) * (channels == 2 ? 4 : channels) * lpc.MAX_LPC_WINDOWS * maxFrames)); if (cuErr == CUResult.Success) - cuErr = CUDADriver.cuMemAllocHost(ref residualTasksPtr, (uint)(sizeof(encodeResidualTaskStruct) * (channels == 2 ? 4 : channels) * (lpc.MAX_LPC_WINDOWS + 1) * lpc.MAX_LPC_ORDER)); + cuErr = CUDADriver.cuMemAllocHost(ref residualTasksPtr, (uint)(sizeof(encodeResidualTaskStruct) * (channels == 2 ? 4 : channels) * (lpc.MAX_LPC_WINDOWS + 1) * lpc.MAX_LPC_ORDER * maxFrames)); if (cuErr != CUResult.Success) { if (samplesBufferPtr != IntPtr.Zero) CUDADriver.cuMemFreeHost(samplesBufferPtr); samplesBufferPtr = IntPtr.Zero; @@ -1358,21 +1370,21 @@ namespace CUETools.Codecs.FlaCuda int len = sampleCount; while (len > 0) { - int block = Math.Min(len, eparams.block_size - samplesInBuffer); + int block = Math.Min(len, FlaCudaWriter.MAX_BLOCKSIZE - samplesInBuffer); copy_samples(buff, pos, block); if (md5 != null) { - AudioSamples.FLACSamplesToBytes(buff, pos, frame_buffer, 0, block, channels, (int)bits_per_sample); - md5.TransformBlock(frame_buffer, 0, block * channels * ((int)bits_per_sample >> 3), null, 0); + AudioSamples.FLACSamplesToBytes(buff, pos, md5_buffer, 0, block, channels, (int)bits_per_sample); + md5.TransformBlock(md5_buffer, 0, block * channels * ((int)bits_per_sample >> 3), null, 0); } len -= block; pos += block; while (samplesInBuffer >= eparams.block_size) - output_frame(); + output_frames(); } } @@ -1584,7 +1596,7 @@ namespace CUETools.Codecs.FlaCuda else max_frame_size = 16 + ((eparams.block_size * channels * (int)bits_per_sample + 7) >> 3); - if (_IO.CanSeek && eparams.do_seektable) + if (_IO.CanSeek && eparams.do_seektable && sample_count != 0) { int seek_points_distance = sample_rate * 10; int num_seek_points = 1 + sample_count / seek_points_distance; // 1 seek point per 10 seconds @@ -1729,24 +1741,24 @@ namespace CUETools.Codecs.FlaCuda case 0: do_midside = false; window_function = WindowFunction.Bartlett; - max_prediction_order = 6; max_partition_order = 4; + max_prediction_order = 6; break; case 1: do_midside = false; window_function = WindowFunction.Bartlett; + max_partition_order = 4; max_prediction_order = 8; - max_partition_order = 6; break; case 2: - do_midside = false; - max_partition_order = 6; - max_prediction_order = 8; + window_function = WindowFunction.Bartlett; + max_partition_order = 4; + max_prediction_order = 4; break; case 3: window_function = WindowFunction.Bartlett; max_partition_order = 4; - max_prediction_order = 4; + max_prediction_order = 5; break; case 4: window_function = WindowFunction.Bartlett; @@ -1758,7 +1770,7 @@ namespace CUETools.Codecs.FlaCuda max_prediction_order = 8; break; case 6: - window_function = WindowFunction.Bartlett; + max_prediction_order = 8; break; case 7: max_prediction_order = 10; @@ -1784,6 +1796,8 @@ namespace CUETools.Codecs.FlaCuda { public int samplesOffs; public int windowOffs; + public int residualOffs; + public int blocksize; }; unsafe struct encodeResidualTaskStruct diff --git a/CUETools.FlaCuda/flacuda.cu b/CUETools.FlaCuda/flacuda.cu index 9da0637..37ae710 100644 --- a/CUETools.FlaCuda/flacuda.cu +++ b/CUETools.FlaCuda/flacuda.cu @@ -24,6 +24,8 @@ typedef struct { int samplesOffs; int windowOffs; + int residualOffs; + int blocksize; } computeAutocorTaskStruct; typedef struct @@ -155,7 +157,7 @@ extern "C" __global__ void cudaComputeLPC( if (tid < 32) { int precision = 13; - int taskNo = (blockIdx.x + blockIdx.y * gridDim.x) * max_order + order; + int taskNo = shared.task.residualOffs + order; shared.bits[tid] = __mul24((33 - __clz(__float2int_rn(fabs(shared.tmp[tid]) * (1 << 15))) - precision), tid <= order); shared.bits[tid] = max(shared.bits[tid], shared.bits[tid + 16]); shared.bits[tid] = max(shared.bits[tid], shared.bits[tid + 8]); diff --git a/CUETools.FlaCuda/flacuda.cubin b/CUETools.FlaCuda/flacuda.cubin index f11e288..73fec60 100644 --- a/CUETools.FlaCuda/flacuda.cubin +++ b/CUETools.FlaCuda/flacuda.cubin @@ -4,7 +4,7 @@ modname {cubin} code { name = cudaComputeAutocor lmem = 0 - smem = 3256 + smem = 3264 reg = 10 bar = 1 const { @@ -13,14 +13,14 @@ code { offset = 0 bytes = 16 mem { - 0x00000001 0x0000007f 0x0000003f 0x0000001f + 0x00000003 0x0000007f 0x0000003f 0x0000001f } } bincode { 0xa0000009 0x04000780 0x308005fd 0x644107c8 0xa000b003 0x00000000 0x3002040d 0xc4100780 0x1000b003 0x00000280 0xa0004e01 0x04200780 - 0x30030001 0xc4100780 0x2100ee00 0x20008600 + 0x30040001 0xc4100780 0x2100ee00 0x20008600 0xd00e0001 0x80c00780 0x00000605 0xc0000780 0x04065801 0xe4200780 0xf0000001 0xe0000002 0x861ffe03 0x00000000 0xa0004c05 0x04200780 @@ -278,132 +278,118 @@ code { code { name = cudaComputeLPC lmem = 0 - smem = 564 + smem = 572 reg = 9 bar = 1 const { segname = const segnum = 1 offset = 0 - bytes = 40 + bytes = 48 mem { - 0x00000001 0x0000001f 0x7e800000 0x3f800000 - 0x0000000f 0x00001fff 0xffffe000 0x3e800000 - 0x00000020 0x0000009e + 0x00000003 0x0000001f 0x7e800000 0x3f800000 + 0x00000001 0x0000000f 0x00001fff 0xffffe000 + 0x3e800000 0x00000020 0x0000009e 0x00000008 } } bincode { - 0xa000000d 0x04000780 0x308007fd 0x644107c8 + 0xa0000009 0x04000780 0x308005fd 0x644107c8 0xa000b003 0x00000000 0x1000b003 0x00000280 - 0xa0004e01 0x04200780 0x30030001 0xc4100780 - 0x30020605 0xc4100780 0x2100ec00 0x20008200 - 0xd00e0001 0x80c00780 0x00020605 0xc0000780 - 0x04001201 0xe4200780 0x3003ce01 0x6c2187d2 - 0xa00001fd 0x0c0147c8 0x00020605 0xc0001680 - 0x0400d601 0xe43f1680 0x861ffe03 0x00000000 + 0xa0004e01 0x04200780 0x30040001 0xc4100780 + 0x30020405 0xc4100780 0x2100ec00 0x20008200 + 0xd00e0001 0x80c00780 0x00020405 0xc0000780 + 0x04001201 0xe4200780 0x3002ce01 0x6c2187d2 + 0xa00001fd 0x0c0147c8 0x00020405 0xc0001680 + 0x0400da01 0xe43f1680 0x861ffe03 0x00000000 0x307cd1fd 0x6c20c7d8 0x1002b003 0x00001280 0x1000f805 0x0403c780 0xa0027003 0x00000000 0x10027003 0x00000100 0x1000d001 0x0423c780 - 0x40014e09 0x00200780 0x30100409 0xc4100780 - 0x60004e09 0x00208780 0x2101ee01 0x00000003 - 0x20000409 0x04004780 0x40010811 0x00000780 - 0x60000a11 0x00010780 0x30100811 0xc4100780 - 0x60000801 0x00010780 0x20000601 0x04000780 - 0x30020001 0xc4100780 0x00020605 0xc0000780 + 0x40014e0d 0x00200780 0x3010060d 0xc4100780 + 0x60004e0d 0x0020c780 0x2101ee01 0x00000003 + 0x2000060d 0x04004780 0x40010c11 0x00000780 + 0x60000e11 0x00010780 0x30100811 0xc4100780 + 0x60000c01 0x00010780 0x20000401 0x04000780 + 0x30020001 0xc4100780 0x00020405 0xc0000780 0x2000ca01 0x04200780 0xd00e0001 0x80c00780 - 0xd4035809 0x20000780 0xb800c001 0x00200780 - 0x0400d601 0xe4200780 0xf0000001 0xe0000002 + 0xd4036809 0x20000780 0xb800c001 0x00200780 + 0x0400da01 0xe4200780 0xf0000001 0xe0000002 0x20018205 0x00000003 0x3001d1fd 0x6c2147d8 0x10013003 0x00001280 0x861ffe03 0x00000000 - 0x30810601 0x6c40c7d0 0xa00001fd 0x0c0147c8 - 0x00020605 0xc0001680 0x04001601 0xe43f1680 - 0xd0035805 0x20000780 0x307ccffd 0x6c20c7d8 - 0x1400c005 0x0423c780 0x30000003 0x00001280 - 0x10248001 0x00000003 0x00000005 0xc0000780 - 0x30020611 0xc4100780 0x1000f815 0x0403c780 - 0x20400a01 0x0400c780 0x00020009 0xc0000780 - 0xa004e003 0x00000000 0x30030bfd 0x6c00c7d8 - 0x1004e003 0x00000100 0xd8035811 0x20000780 - 0x0000080d 0xc0000780 0x1000c001 0x0423c784 - 0xcc00d601 0x00200780 0x1000f801 0x0403d280 - 0xdc016011 0x20000780 0x0c005601 0xe4200780 - 0xb000de01 0x00200784 0x0c005601 0xe4200780 - 0xb000ce01 0x00200784 0x0c005601 0xe4200780 - 0xb000c601 0x00200784 0x0c005601 0xe4200780 - 0xb000c201 0x00200784 0x0c005601 0xe4200780 - 0xb000c001 0x00200784 0x0c005601 0xe4200780 + 0x30810401 0x6c40c7d0 0xa00001fd 0x0c0147c8 + 0x00020405 0xc0001680 0x04001a01 0xe43f1680 + 0xd0036805 0x20000780 0x307ccffd 0x6c20c7d8 + 0x1400c001 0x0423c780 0x30000003 0x00001280 + 0x10248005 0x00000003 0x00000205 0xc0000780 + 0x3002040d 0xc4100780 0x1000f811 0x0403c780 + 0x20400805 0x04008780 0x00020209 0xc0000780 + 0xa004e003 0x00000000 0x300209fd 0x6c00c7d8 + 0x1004e003 0x00000100 0xd8036811 0x20000780 + 0x0000060d 0xc0000780 0x1000c005 0x0423c784 + 0xcc01da05 0x00200780 0x1000f805 0x0403d280 + 0xdc017011 0x20000780 0x0c005a01 0xe4204780 + 0xb000de05 0x00204784 0x0c005a01 0xe4204780 + 0xb000ce05 0x00204784 0x0c005a01 0xe4204780 + 0xb000c605 0x00204784 0x0c005a01 0xe4204780 + 0xb000c205 0x00204784 0x0c005a01 0xe4204780 + 0xb000c005 0x00204784 0x0c005a01 0xe4204780 0xf0000001 0xe0000002 0x861ffe03 0x00000000 - 0xd0015811 0x20000780 0xd403180d 0x20000780 - 0x1000c001 0x0423c784 0xbc00c009 0x00200780 - 0xb08203fd 0x605107e8 0x10000201 0x0403c780 - 0xa0000409 0xe4004780 0xc0870409 0x00402680 - 0xc0870001 0x00402680 0x90000000 0xc0000418 - 0xc806d409 0x00200780 0x1000f809 0x0403d280 - 0x30030bfd 0x6c0147d8 0xb0000c01 0x00008780 - 0x10000401 0x0403d280 0x0000080d 0xc0000780 - 0xe1060c09 0x0440c780 0xbd007600 0xc0020204 - 0xa00d6003 0x00000000 0x0c001601 0xe4200780 - 0x100d6003 0x00000100 0xa0000001 0xc4104780 - 0xc0000001 0x04700003 0xa0000001 0x8c0047d0 - 0xa0000001 0x44065680 0x30170001 0xec101680 - 0x31000001 0x04425680 0x10001001 0x2440d100 - 0x30030a09 0x6c0187d0 0x30148001 0x00000003 - 0xd0800409 0x04400780 0x0000080d 0xc0000780 - 0x40020001 0x00018780 0xdc026011 0x20000780 - 0x0c009601 0xe4200780 0x3000de01 0x8c200784 - 0x0c009601 0xe4200780 0x3000ce01 0x8c200784 - 0x0c009601 0xe4200780 0x3000c601 0x8c200784 - 0x0c009601 0xe4200780 0x3000c201 0x8c200784 - 0x0c009601 0xe4200780 0x3000c001 0x8c200784 - 0x0c009601 0xe4200780 0xd002580d 0x20000780 - 0x3d0fe001 0x00000003 0x30840001 0xac400780 - 0x10018009 0x00000003 0x307c001d 0x8c000780 - 0x30070401 0xc4000780 0xa0000001 0x44014780 - 0xc800d601 0x00200780 0xa0000001 0xac004780 - 0x30850001 0xac400780 0xa0099003 0x00000000 - 0x30860019 0x8c400780 0x10099003 0x00001100 - 0xa0004c09 0x04200780 0x10004e01 0x0023c780 - 0x60004809 0x00208780 0x1000ce01 0x0423c780 - 0x40050021 0x00000780 0x60040221 0x00020780 - 0x30101021 0xc4100780 0x60040001 0x00020780 - 0x20000001 0x04014780 0x30070009 0xc4100780 - 0x30060001 0xc4100780 0x20008400 0x2100e800 - 0x20000801 0x04000780 0x20008001 0x00000007 - 0xd00e0019 0xa0c00780 0x307c0601 0x6c0087e2 - 0xa00001fd 0x0c0147d8 0xa00ab003 0x00000000 - 0x100ab003 0x00002100 0xa0004c09 0x04200780 - 0x10004e01 0x0023c780 0x60004809 0x00208780 - 0x1000ce01 0x0423c780 0x40050021 0x00000780 - 0x60040221 0x00020780 0x30101021 0xc4100780 - 0x60040001 0x00020780 0x20000001 0x04014780 - 0x30070009 0xc4100780 0x30060001 0xc4100780 - 0x20008400 0x2100e800 0x20088001 0x00000003 - 0xd00e001d 0xa0c00780 0x307c0dfd 0x6c0087ea - 0xa0000c01 0x44066500 0x30170001 0xec102500 - 0x31000001 0x04426500 0x10001001 0x2440e280 - 0xd0060009 0x0402c780 0x307c05fd 0x6c0087e8 - 0xa0000409 0x44066500 0x30170409 0xec102500 - 0x31000409 0x04426500 0x10001009 0x2440e280 - 0x30020001 0x8c000780 0x00000809 0xc0000780 - 0x30218001 0x00000003 0xd802600d 0x20000780 - 0x08009601 0xe4200780 0x3c00de01 0x8c200780 - 0x08009601 0xe4200780 0x3c00ce01 0x8c200780 - 0x08009601 0xe4200780 0x3c00c601 0x8c200780 - 0x08009601 0xe4200780 0x3c00c201 0x8c200780 - 0x08009601 0xe4200780 0x3c00c001 0x8c200780 - 0x08009601 0xe4200780 0x100d6003 0x00001100 - 0xa0004c09 0x04200780 0x10004e01 0x0023c780 - 0x60004809 0x00208780 0x1000ce01 0x0423c780 - 0x40050019 0x00000780 0x60040219 0x00018780 - 0x30100c19 0xc4100780 0x60040001 0x00018780 - 0x20000001 0x04014780 0x30070009 0xc4100780 - 0x30060001 0xc4100780 0x20000401 0x04000780 - 0xd0025809 0x20000780 0x2100e808 0x1900e000 - 0x200c8409 0x00000003 0xd00e0401 0xa0c00780 - 0xf0000001 0xe0000002 0x861ffe03 0x00000000 - 0x20018a15 0x00000003 0x3005cffd 0x6c2147d8 - 0xd4000805 0x20000780 0x10038003 0x00001280 - 0xf0000001 0xe0000001 + 0xd0016811 0x20000780 0xd403280d 0x20000780 + 0x1000c005 0x0423c784 0xbc00c015 0x00204780 + 0xb08201fd 0x605107e8 0x10000005 0x0403c780 + 0xa0000a15 0xe4004780 0xc0880a15 0x00402680 + 0xc0880205 0x00402680 0x90000204 0xc0010a18 + 0xc806d815 0x00200780 0x1000f815 0x0403d280 + 0x300209fd 0x6c0147d8 0xb0000c05 0x00014780 + 0x10000a05 0x0403d280 0x0000060d 0xc0000780 + 0xe1060c15 0x0440c780 0xbd017a04 0xc0050000 + 0xa00bb003 0x00000000 0x0c001a01 0xe4204780 + 0x100bb003 0x00000100 0xa0000205 0xc4104780 + 0xc0000205 0x04700003 0xa0000215 0x8c0047d0 + 0x2000d605 0x04210780 0xa0000a15 0x44065680 + 0x30170a15 0xec101680 0x31000a15 0x04429680 + 0x10001215 0x2440d100 0x30020819 0x6c0187d0 + 0x30148a15 0x00000003 0xd0840c19 0x04400780 + 0x0000060d 0xc0000780 0x40060a15 0x00018780 + 0xdc027011 0x20000780 0x0c009a01 0xe4214780 + 0x3005de15 0x8c200784 0x0c009a01 0xe4214780 + 0x3005ce15 0x8c200784 0x0c009a01 0xe4214780 + 0x3005c615 0x8c200784 0x0c009a01 0xe4214780 + 0x3005c215 0x8c200784 0x0c009a01 0xe4214780 + 0x3005c015 0x8c200784 0x0c009a01 0xe4214780 + 0xd002680d 0x20000780 0x3d0fe015 0x00000003 + 0x30850a15 0xac400780 0x10018019 0x00000003 + 0x307c0a15 0x8c000780 0x30050c19 0xc4000780 + 0xa0000c19 0x44014780 0xc806da19 0x00200780 + 0xa0000c19 0xac004780 0x30860c19 0xac400780 + 0xa0091003 0x00000000 0x30870c19 0x8c400780 + 0x10091003 0x00001100 0x3007021d 0xc4100780 + 0x30060221 0xc4100780 0x20088e1c 0x2107e81c + 0x2000061d 0x0401c780 0x20008e1d 0x00000007 + 0xd00e0e19 0xa0c00780 0x307c041d 0x6c0087e2 + 0xa0000ffd 0x0c0147d8 0x3007021d 0xc4102680 + 0x30060221 0xc4102680 0x20000e1d 0x04022680 + 0x2000c81d 0x0421e680 0x21000e1d 0x0442e680 + 0xd00e0e15 0xa0c02680 0x307c0dfd 0x6c0087e8 + 0xa0000c15 0x44066500 0x30170a15 0xec102500 + 0x31000a15 0x0442a500 0x10001215 0x2440e280 + 0xd0060019 0x0402c780 0x307c0dfd 0x6c0087e8 + 0xa0000c19 0x44066500 0x30170c19 0xec102500 + 0x31000c19 0x0442a500 0x10001219 0x2440e280 + 0x30060a15 0x8c000780 0x00000609 0xc0000780 + 0x30218a15 0x00000003 0xd802700d 0x20000780 + 0x08009a01 0xe4214780 0x3c05de15 0x8c200780 + 0x08009a01 0xe4214780 0x3c05ce15 0x8c200780 + 0x08009a01 0xe4214780 0x3c05c615 0x8c200780 + 0x08009a01 0xe4214780 0x3c05c215 0x8c200780 + 0x08009a01 0xe4214780 0x3c05c015 0x8c200780 + 0x08009a01 0xe4214780 0x100bb003 0x00001100 + 0x30070215 0xc4100780 0x30060205 0xc4100780 + 0x20000a05 0x04004780 0xd0026809 0x20000780 + 0x2101e814 0x1900e004 0x200c8a15 0x00000003 + 0xd00e0a05 0xa0c00780 0xf0000001 0xe0000002 + 0x861ffe03 0x00000000 0x20018811 0x00000003 + 0x3004cffd 0x6c2147d8 0xd4000805 0x20000780 + 0x10038003 0x00001280 0xf0000001 0xe0000001 } } code {