From 4a47615f7c9b2652950b65efb30e2d1158986a9b Mon Sep 17 00:00:00 2001 From: chudov Date: Fri, 15 Oct 2010 19:56:36 +0000 Subject: [PATCH] opencl flac encoder --- CUETools.Codecs.FLACCL/FLACCLWriter.cs | 107 ++++++++---- CUETools.Codecs.FLACCL/flac.cl | 217 ++++++++++++++++++------- CUETools.FLACCL.cmd/Program.cs | 21 +-- 3 files changed, 245 insertions(+), 100 deletions(-) diff --git a/CUETools.Codecs.FLACCL/FLACCLWriter.cs b/CUETools.Codecs.FLACCL/FLACCLWriter.cs index c525c71..0dc7f60 100644 --- a/CUETools.Codecs.FLACCL/FLACCLWriter.cs +++ b/CUETools.Codecs.FLACCL/FLACCLWriter.cs @@ -53,6 +53,9 @@ namespace CUETools.Codecs.FLACCL [SRDescription(typeof(Properties.Resources), "DescriptionGroupSize")] public int GroupSize { get; set; } + [SRDescription(typeof(Properties.Resources), "DescriptionDefines")] + public string Defines { get; set; } + int cpu_threads = 1; [DefaultValue(1)] [SRDescription(typeof(Properties.Resources), "DescriptionCPUThreads")] @@ -474,6 +477,12 @@ namespace CUETools.Codecs.FLACCL } } + public bool DoConstant + { + get { return eparams.do_constant; } + set { eparams.do_constant = value; } + } + public int MinPartitionOrder { get { return eparams.min_partition_order; } @@ -1173,12 +1182,8 @@ namespace CUETools.Codecs.FLACCL return; int max_porder = get_max_p_order(eparams.max_partition_order, task.frameSize, eparams.max_prediction_order); - int calcPartitionPartSize = task.frameSize >> max_porder; - while (calcPartitionPartSize < 16 && max_porder > 0) - { - calcPartitionPartSize <<= 1; + while ((task.frameSize >> max_porder) < 16 && max_porder > 0) max_porder--; - } if (channels != 2) throw new Exception("channels != 2"); // need to Enqueue cudaChannelDecorr for each channel Kernel cudaChannelDecorr = channels == 2 ? (channelsCount == 4 ? task.cudaStereoDecorr : task.cudaChannelDecorr2) : null;// task.cudaChannelDecorr; @@ -1212,12 +1217,6 @@ namespace CUETools.Codecs.FLACCL task.cudaEncodeResidual.SetArg(1, task.cudaSamples); task.cudaEncodeResidual.SetArg(2, task.cudaBestResidualTasks); - task.cudaCalcPartition.SetArg(0, task.cudaPartitions); - task.cudaCalcPartition.SetArg(1, task.cudaResidual); - task.cudaCalcPartition.SetArg(2, task.cudaBestResidualTasks); - task.cudaCalcPartition.SetArg(3, max_porder); - task.cudaCalcPartition.SetArg(4, calcPartitionPartSize); - task.cudaSumPartition.SetArg(0, task.cudaPartitions); task.cudaSumPartition.SetArg(1, max_porder); @@ -1276,10 +1275,19 @@ namespace CUETools.Codecs.FLACCL task.openCLCQ.EnqueueNDRangeKernel(task.cudaCopyBestMethod, 2, null, new int[] { 64, channels * task.frameCount }, new int[] { 64, 1 }); if (_settings.GPUOnly) { - task.openCLCQ.EnqueueBarrier(); - task.openCLCQ.EnqueueNDRangeKernel(task.cudaEncodeResidual, 1, null, new int[] { task.groupSize * channels * task.frameCount }, new int[] { task.groupSize }); - task.openCLCQ.EnqueueBarrier(); - task.openCLCQ.EnqueueNDRangeKernel(task.cudaCalcPartition, 2, null, new int[] { task.groupSize * (1 << max_porder), channels * task.frameCount }, new int[] { task.groupSize, 1 }); + task.max_porder = max_porder; + if (task.frameSize >> max_porder == 16) + { + task.openCLCQ.EnqueueBarrier(); + task.EnqueueCalcPartition16(channels); + } + else + { + task.openCLCQ.EnqueueBarrier(); + task.openCLCQ.EnqueueNDRangeKernel(task.cudaEncodeResidual, 1, null, new int[] { task.groupSize * channels * task.frameCount }, new int[] { task.groupSize }); + task.openCLCQ.EnqueueBarrier(); + task.EnqueueCalcPartition(channels); + } if (max_porder > 0) { task.openCLCQ.EnqueueBarrier(); @@ -1293,7 +1301,6 @@ namespace CUETools.Codecs.FLACCL task.openCLCQ.EnqueueBarrier(); task.openCLCQ.EnqueueReadBuffer(task.cudaResidual, false, 0, sizeof(int) * MAX_BLOCKSIZE * channels, task.residualBufferPtr.AddrOfPinnedObject()); task.openCLCQ.EnqueueReadBuffer(task.cudaBestRiceParams, false, 0, sizeof(int) * (1 << max_porder) * channels * task.frameCount, task.bestRiceParamsPtr.AddrOfPinnedObject()); - task.max_porder = max_porder; } task.openCLCQ.EnqueueBarrier(); task.openCLCQ.EnqueueReadBuffer(task.cudaBestResidualTasks, false, 0, sizeof(FLACCLSubframeTask) * channels * task.frameCount, task.bestResidualTasksPtr.AddrOfPinnedObject()); @@ -1575,11 +1582,12 @@ namespace CUETools.Codecs.FLACCL // and serve as a convenient way to pass configuration information to the compilation process OCLMan.Defines = "#define MAX_ORDER " + eparams.max_prediction_order.ToString() + "\n" + - "#define GROUP_SIZE " + groupSize.ToString() + "\n"; + "#define GROUP_SIZE " + groupSize.ToString() + "\n" + + _settings.Defines + "\n"; // The BuildOptions string is passed directly to clBuild and can be used to do debug builds etc OCLMan.BuildOptions = ""; OCLMan.SourcePath = System.IO.Path.GetDirectoryName(GetType().Assembly.Location); - //OCLMan.BinaryPath = ; + OCLMan.BinaryPath = System.IO.Path.Combine(System.IO.Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.LocalApplicationData), "CUE Tools"), "OpenCL"); OCLMan.CreateDefaultContext(0, DeviceType.GPU); openCLContext = OCLMan.Context; @@ -1778,7 +1786,7 @@ namespace CUETools.Codecs.FLACCL public string Path { get { return _path; } } - public static readonly string vendor_string = "FLACCL#.91"; + public static readonly string vendor_string = "FLACCL#0.1"; int select_blocksize(int samplerate, int time_ms) { @@ -2142,41 +2150,48 @@ namespace CUETools.Codecs.FLACCL do_constant = false; do_wasted = false; do_midside = false; + window_function = WindowFunction.Bartlett; orders_per_window = 1; max_partition_order = 4; max_prediction_order = 7; - min_fixed_order = 2; + min_fixed_order = 3; max_fixed_order = 2; break; case 1: + do_constant = false; do_wasted = false; do_midside = false; window_function = WindowFunction.Bartlett; orders_per_window = 1; - max_prediction_order = 12; + min_fixed_order = 2; + max_fixed_order = 2; + max_prediction_order = 7; max_partition_order = 4; break; case 2: do_constant = false; + do_midside = false; window_function = WindowFunction.Bartlett; - min_fixed_order = 3; + min_fixed_order = 2; max_fixed_order = 2; orders_per_window = 1; - max_prediction_order = 7; + max_prediction_order = 8; max_partition_order = 4; break; case 3: window_function = WindowFunction.Bartlett; + do_constant = false; min_fixed_order = 2; max_fixed_order = 2; - orders_per_window = 6; - max_prediction_order = 7; + orders_per_window = 1; + max_prediction_order = 8; max_partition_order = 4; break; case 4: + do_constant = false; min_fixed_order = 2; max_fixed_order = 2; - orders_per_window = 3; + orders_per_window = 1; max_prediction_order = 8; max_partition_order = 4; break; @@ -2184,18 +2199,21 @@ namespace CUETools.Codecs.FLACCL do_constant = false; min_fixed_order = 2; max_fixed_order = 2; - orders_per_window = 1; + orders_per_window = 2; + max_prediction_order = 8; break; case 6: + do_constant = false; + min_fixed_order = 2; + max_fixed_order = 2; + orders_per_window = 1; + break; + case 7: + do_constant = false; min_fixed_order = 2; max_fixed_order = 2; orders_per_window = 3; break; - case 7: - min_fixed_order = 2; - max_fixed_order = 2; - orders_per_window = 7; - break; case 8: orders_per_window = 12; break; @@ -2264,6 +2282,7 @@ namespace CUETools.Codecs.FLACCL public Kernel cudaCopyBestMethodStereo; public Kernel cudaEncodeResidual; public Kernel cudaCalcPartition; + public Kernel cudaCalcPartition16; public Kernel cudaSumPartition; public Kernel cudaFindRiceParameter; public Kernel cudaFindPartitionOrder; @@ -2355,6 +2374,7 @@ namespace CUETools.Codecs.FLACCL cudaCopyBestMethodStereo = openCLProgram.CreateKernel("cudaCopyBestMethodStereo"); cudaEncodeResidual = openCLProgram.CreateKernel("cudaEncodeResidual"); cudaCalcPartition = openCLProgram.CreateKernel("cudaCalcPartition"); + cudaCalcPartition16 = openCLProgram.CreateKernel("cudaCalcPartition16"); cudaSumPartition = openCLProgram.CreateKernel("cudaSumPartition"); cudaFindRiceParameter = openCLProgram.CreateKernel("cudaFindRiceParameter"); cudaFindPartitionOrder = openCLProgram.CreateKernel("cudaFindPartitionOrder"); @@ -2398,6 +2418,7 @@ namespace CUETools.Codecs.FLACCL cudaCopyBestMethodStereo.Dispose(); cudaEncodeResidual.Dispose(); cudaCalcPartition.Dispose(); + cudaCalcPartition16.Dispose(); cudaSumPartition.Dispose(); cudaFindRiceParameter.Dispose(); cudaFindPartitionOrder.Dispose(); @@ -2464,6 +2485,28 @@ namespace CUETools.Codecs.FLACCL openCLCQ.EnqueueNDRangeKernel(cudaChooseBestMethod, 2, null, new int[] { 32, channelsCount * frameCount }, new int[] { 32, 1 }); } + public void EnqueueCalcPartition16(int channels) + { + cudaCalcPartition16.SetArg(0, cudaPartitions); + cudaCalcPartition16.SetArg(1, cudaResidual); + cudaCalcPartition16.SetArg(2, cudaSamples); + cudaCalcPartition16.SetArg(3, cudaBestResidualTasks); + cudaCalcPartition16.SetArg(4, max_porder); + + openCLCQ.EnqueueNDRangeKernel(cudaCalcPartition16, 2, null, new int[] { groupSize, channels * frameCount }, new int[] { groupSize, 1 }); + } + + public void EnqueueCalcPartition(int channels) + { + cudaCalcPartition.SetArg(0, cudaPartitions); + cudaCalcPartition.SetArg(1, cudaResidual); + cudaCalcPartition.SetArg(2, cudaBestResidualTasks); + cudaCalcPartition.SetArg(3, max_porder); + cudaCalcPartition.SetArg(4, frameSize >> max_porder); + + openCLCQ.EnqueueNDRangeKernel(cudaCalcPartition, 2, null, new int[] { groupSize * (1 << max_porder), channels * frameCount }, new int[] { groupSize, 1 }); + } + public unsafe FLACCLSubframeTask* ResidualTasks { get diff --git a/CUETools.Codecs.FLACCL/flac.cl b/CUETools.Codecs.FLACCL/flac.cl index d8e09d7..b826560 100644 --- a/CUETools.Codecs.FLACCL/flac.cl +++ b/CUETools.Codecs.FLACCL/flac.cl @@ -449,7 +449,7 @@ void cudaQuantizeLPC( } } -#define DONT_BEACCURATE +#define BEACCURATE __kernel /*__attribute__(( vec_type_hint (int4)))*/ __attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1))) void cudaEstimateResidual( @@ -481,24 +481,35 @@ void cudaEstimateResidual( if (tid < GROUP_SIZE / 16) len[tid] = 0; #else - float res = 0.0f; + long res = 0; +#endif + data[tid] = 0; + + barrier(CLK_LOCAL_MEM_FENCE); + + __local int4 * cptr = (__local int4 *)&task.coefs[0]; + int4 cptr0 = cptr[0]; +#if MAX_ORDER > 4 + int4 cptr1 = cptr[1]; +#if MAX_ORDER > 8 + int4 cptr2 = cptr[2]; +#endif #endif - data[tid] = tid < bs ? samples[task.data.samplesOffs + tid] >> task.data.wbits : 0; for (int pos = 0; pos < bs; pos += GROUP_SIZE) { // fetch samples - int nextData = pos + tid + GROUP_SIZE < bs ? samples[task.data.samplesOffs + pos + tid + GROUP_SIZE] >> task.data.wbits : 0; + int offs = pos + tid; + int nextData = offs < bs ? samples[task.data.samplesOffs + offs] >> task.data.wbits : 0; data[tid + GROUP_SIZE] = nextData; barrier(CLK_LOCAL_MEM_FENCE); // compute residual - __local int4 * dptr = (__local int4 *)&data[tid]; - __local int4 * cptr = (__local int4 *)&task.coefs[0]; - int4 sum = dptr[0] * cptr[0] + __local int4 * dptr = (__local int4 *)&data[tid + GROUP_SIZE - ro]; + int4 sum = dptr[0] * cptr0 #if MAX_ORDER > 4 - + dptr[1] * cptr[1] + + dptr[1] * cptr1 #if MAX_ORDER > 8 - + dptr[2] * cptr[2] + + dptr[2] * cptr2 #if MAX_ORDER > 12 + dptr[3] * cptr[3] #if MAX_ORDER > 16 @@ -512,23 +523,23 @@ void cudaEstimateResidual( #endif ; - int t = select(0, data[tid + ro] - ((sum.x + sum.y + sum.z + sum.w) >> task.data.shift), pos + tid + ro < bs); + int t = select(0, data[tid + GROUP_SIZE] - ((sum.x + sum.y + sum.z + sum.w) >> task.data.shift), offs >= ro && offs < bs); #ifdef BEACCURATE - residual[tid] = min((t << 1) ^ (t >> 31), 0x7fffff); + t = clamp(t, -0x7fffff, 0x7fffff); + residual[tid] = (t << 1) ^ (t >> 31); #else - res += fabs(t); + res += (t << 1) ^ (t >> 31); #endif - barrier(CLK_LOCAL_MEM_FENCE); + barrier(CLK_GLOBAL_MEM_FENCE); #ifdef BEACCURATE if (tid < GROUP_SIZE / 16) { - __local int4 * chunk = ((__local int4 *)residual) + tid * 4; + __local int4 * chunk = ((__local int4 *)residual) + (tid << 2); int4 sum = chunk[0] + chunk[1] + chunk[2] + chunk[3]; int res = sum.x + sum.y + sum.z + sum.w; - int k = clamp(clz(16) - clz(res), 0, 14); - len[tid] += 16 * k + (res >> k); - k = clamp(clz(16) - clz(res), 0, 14); + int k = clamp(27 - clz(res), 0, 14); // 27 - clz(res) == clz(16) - clz(res) == log2(res / 16) + len[tid] += (k << 4) + (res >> k); } #endif @@ -557,7 +568,7 @@ void cudaEstimateResidual( if (tid == 0) { int residualLen = (bs - ro); - float sum = residual[0] * 2;// + residualLen / 2; + float sum = residual[0];// + residualLen / 2; //int k = clamp(convert_int_rtn(log2((sum + 0.000001f) / (residualLen + 0.000001f))), 0, 14); int k; frexp((sum + 0.000001f) / residualLen, &k); @@ -608,7 +619,7 @@ void cudaChooseBestMethod( min(obits * task.blocksize, task.type == Fixed ? task.residualOrder * obits + 6 + (4 * 1/2) + partLen : task.type == LPC ? task.residualOrder * obits + 4 + 5 + task.residualOrder * task.cbits + 6 + (4 * 1/2)/* << porder */ + partLen : - task.type == Constant ? obits * (1 + task.blocksize * (partLen != 0)) : + task.type == Constant ? obits * select(1, task.blocksize, partLen != task.blocksize - task.residualOrder) : obits * task.blocksize); } @@ -721,21 +732,50 @@ void cudaEncodeResidual( int bs = task.data.blocksize; int ro = task.data.residualOrder; - data[tid] = tid < bs ? samples[task.data.samplesOffs + tid] >> task.data.wbits : 0; + if (tid < 32 && tid >= ro) + task.coefs[tid] = 0; + + barrier(CLK_LOCAL_MEM_FENCE); + + __local int4 * cptr = (__local int4 *)&task.coefs[0]; + int4 cptr0 = cptr[0]; +#if MAX_ORDER > 4 + int4 cptr1 = cptr[1]; +#if MAX_ORDER > 8 + int4 cptr2 = cptr[2]; +#endif +#endif + + data[tid] = 0; for (int pos = 0; pos < bs; pos += GROUP_SIZE) { // fetch samples - float nextData = pos + tid + GROUP_SIZE < bs ? samples[task.data.samplesOffs + pos + tid + GROUP_SIZE] >> task.data.wbits : 0; + int off = pos + tid; + int nextData = off < bs ? samples[task.data.samplesOffs + off] >> task.data.wbits : 0; data[tid + GROUP_SIZE] = nextData; barrier(CLK_LOCAL_MEM_FENCE); // compute residual - int sum = 0; - for (int c = 0; c < ro; c++) - sum += data[tid + c] * task.coefs[c]; - sum = data[tid + ro] - (sum >> task.data.shift); - if (pos + tid + ro < bs) - output[task.data.residualOffs + pos + tid + ro] = sum; + __local int4 * dptr = (__local int4 *)&data[tid + GROUP_SIZE - ro]; + int4 sum = dptr[0] * cptr0 +#if MAX_ORDER > 4 + + dptr[1] * cptr1 +#if MAX_ORDER > 8 + + dptr[2] * cptr2 +#if MAX_ORDER > 12 + + dptr[3] * cptr[3] +#if MAX_ORDER > 16 + + dptr[4] * cptr[4] + + dptr[5] * cptr[5] + + dptr[6] * cptr[6] + + dptr[7] * cptr[7] +#endif +#endif +#endif +#endif + ; + if (off >= ro && off < bs) + output[task.data.residualOffs + off] = data[tid + GROUP_SIZE] - ((sum.x + sum.y + sum.z + sum.w) >> task.data.shift); barrier(CLK_LOCAL_MEM_FENCE); data[tid] = nextData; @@ -795,6 +835,98 @@ void cudaCalcPartition( } } +// get_group_id(1) == task index +__kernel __attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1))) +void cudaCalcPartition16( + __global int *partition_lengths, + __global int *residual, + __global int *samples, + __global FLACCLSubframeTask *tasks, + int max_porder // <= 8 + ) +{ + __local FLACCLSubframeTask task; + __local int data[GROUP_SIZE * 2]; + __local int res[GROUP_SIZE]; + + const int tid = get_local_id(0); + if (tid < sizeof(task) / sizeof(int)) + ((__local int*)&task)[tid] = ((__global int*)(&tasks[get_group_id(1)]))[tid]; + barrier(CLK_LOCAL_MEM_FENCE); + + int bs = task.data.blocksize; + int ro = task.data.residualOrder; + + if (tid >= ro && tid < 32) + task.coefs[tid] = 0; + + int k = tid % 16; + int x = tid / 16; + + barrier(CLK_LOCAL_MEM_FENCE); + + __local int4 * cptr = (__local int4 *)&task.coefs[0]; + int4 cptr0 = cptr[0]; +#if MAX_ORDER > 4 + int4 cptr1 = cptr[1]; +#if MAX_ORDER > 8 + int4 cptr2 = cptr[2]; +#endif +#endif + + data[tid] = 0; + for (int pos = 0; pos < bs; pos += GROUP_SIZE) + { + int offs = pos + tid; + // fetch samples + int nextData = offs < bs ? samples[task.data.samplesOffs + offs] >> task.data.wbits : 0; + data[tid + GROUP_SIZE] = nextData; + barrier(CLK_LOCAL_MEM_FENCE); + + // compute residual + __local int4 * dptr = (__local int4 *)&data[tid + GROUP_SIZE - ro]; + int4 sum = dptr[0] * cptr0 +#if MAX_ORDER > 4 + + dptr[1] * cptr1 +#if MAX_ORDER > 8 + + dptr[2] * cptr2 +#if MAX_ORDER > 12 + + dptr[3] * cptr[3] +#if MAX_ORDER > 16 + + dptr[4] * cptr[4] + + dptr[5] * cptr[5] + + dptr[6] * cptr[6] + + dptr[7] * cptr[7] +#endif +#endif +#endif +#endif + ; + int s = select(0, nextData - ((sum.x + sum.y + sum.z + sum.w) >> task.data.shift), offs >= ro && offs < bs); + + // output residual + if (offs < bs) + residual[task.data.residualOffs + offs] = s; + + //int s = select(0, residual[task.data.residualOffs + offs], offs >= ro && offs < bs); + + s = clamp(s, -0x7fffff, 0x7fffff); + // convert to unsigned + res[tid] = (s << 1) ^ (s >> 31); + barrier(CLK_LOCAL_MEM_FENCE); + data[tid] = nextData; + + // calc number of unary bits for each residual sample with each rice paramater + __local int4 * chunk = (__local int4 *)&res[x << 4]; + sum = (chunk[0] >> k) + (chunk[1] >> k) + (chunk[2] >> k) + (chunk[3] >> k); + s = sum.x + sum.y + sum.z + sum.w; + + const int lpos = (15 << (max_porder + 1)) * get_group_id(1) + (k << (max_porder + 1)) + offs / 16; + if (k <= 14) + partition_lengths[lpos] = min(0x7fffff, s) + (16 - select(0, ro, offs < 16)) * (k + 1); + } +} + // Sums partition lengths for a certain k == get_group_id(0) // Requires 128 threads // get_group_id(0) == k @@ -949,36 +1081,5 @@ void cudaFindPartitionOrder( if (offs + get_local_id(0) < (1 << porder)) best_rice_parameters[(get_group_id(0) << max_porder) + offs + get_local_id(0)] = rice_parameters[pos - (2 << porder) + offs + get_local_id(0)]; // FIXME: should be bytes? - // if (get_local_id(0) < (1 << porder)) - //shared.tmp[get_local_id(0)] = rice_parameters[pos - (2 << porder) + get_local_id(0)]; - // barrier(CLK_LOCAL_MEM_FENCE); - // if (get_local_id(0) < max(1, (1 << porder) >> 2)) - // { - //char4 ch; - //ch.x = shared.tmp[(get_local_id(0) << 2)]; - //ch.y = shared.tmp[(get_local_id(0) << 2) + 1]; - //ch.z = shared.tmp[(get_local_id(0) << 2) + 2]; - //ch.w = shared.tmp[(get_local_id(0) << 2) + 3]; - //shared.ch[get_local_id(0)] = ch - // } - // barrier(CLK_LOCAL_MEM_FENCE); - // if (get_local_id(0) < max(1, (1 << porder) >> 2)) - //best_rice_parameters[(get_group_id(1) << max_porder) + get_local_id(0)] = shared.ch[get_local_id(0)]; } - -//#endif -// -//#if 0 -// if (get_local_id(0) < order) -// { -// for (int i = 0; i < order; i++) -// if (get_local_id(0) >= i) -// sum[get_local_id(0) - i] += coefs[get_local_id(0)] * sample[order - i - 1]; -// fot (int i = order; i < blocksize; i++) -// { -// if (!get_local_id(0)) sample[order + i] = s = residual[order + i] + (sum[order + i] >> shift); -// sum[get_local_id(0) + i + 1] += coefs[get_local_id(0)] * s; -// } -// } -//#endif #endif diff --git a/CUETools.FLACCL.cmd/Program.cs b/CUETools.FLACCL.cmd/Program.cs index 87bdcea..00a8d37 100644 --- a/CUETools.FLACCL.cmd/Program.cs +++ b/CUETools.FLACCL.cmd/Program.cs @@ -83,6 +83,7 @@ namespace CUETools.FLACCL.cmd bool do_seektable = true; bool buffered = false; bool ok = true; + int intarg; for (int arg = 0; arg < args.Length; arg++) { @@ -108,12 +109,10 @@ namespace CUETools.FLACCL.cmd ok = (++arg < args.Length) && int.TryParse(args[arg], out val); settings.CPUThreads = val; } - else if (args[arg] == "--group-size") - { - int val = settings.GroupSize; - ok = (++arg < args.Length) && int.TryParse(args[arg], out val); - settings.GroupSize = val; - } + else if (args[arg] == "--group-size" && ++arg < args.Length && int.TryParse(args[arg], out intarg)) + settings.GroupSize = intarg; + else if (args[arg] == "--define" && arg + 2 < args.Length) + settings.Defines += "#define " + args[++arg] + " " + args[++arg] + "\n"; else if ((args[arg] == "-o" || args[arg] == "--output") && ++arg < args.Length) output_file = args[arg]; else if ((args[arg] == "-s" || args[arg] == "--stereo") && ++arg < args.Length) @@ -167,7 +166,7 @@ namespace CUETools.FLACCL.cmd } if (!quiet) { - Console.WriteLine("{0}, Copyright (C) 2009 Gregory S. Chudov.", FLACCLWriter.vendor_string); + Console.WriteLine("{0}, Copyright (C) 2010 Gregory S. Chudov.", FLACCLWriter.vendor_string); Console.WriteLine("This is free software under the GNU GPLv3+ license; There is NO WARRANTY, to"); Console.WriteLine("the extent permitted by law. for details."); } @@ -317,19 +316,21 @@ namespace CUETools.FLACCL.cmd if (debug) { Console.SetOut(stdout); - Console.Out.WriteLine("{0}\t{1}\t{2}\t{3}\t{4} ({5})\t{6} ({7})\t{8}..{9}\t{10}\t{11}", + Console.Out.WriteLine("{0}\t{1}\t{2}\t{3}\t{4} ({5})\t{6}/{7}+{12}{13}\t{8}..{9}\t{10}\t{11}", encoder.TotalSize, encoder.UserProcessorTime.TotalSeconds > 0 ? encoder.UserProcessorTime.TotalSeconds : totalElapsed.TotalSeconds, encoder.StereoMethod.ToString().PadRight(15), encoder.WindowFunction.ToString().PadRight(15), encoder.MaxPartitionOrder, settings.GPUOnly ? "GPU" : "CPU", - encoder.MaxLPCOrder, encoder.OrdersPerWindow, + encoder.MaxLPCOrder, encoder.MinPrecisionSearch, encoder.MaxPrecisionSearch, encoder.BlockSize, - encoder.VBRMode + encoder.VBRMode, + encoder.MaxFixedOrder - encoder.MinFixedOrder + 1, + encoder.DoConstant ? "c" : "" ); } return 0;