diff --git a/CUETools.Codecs.FLACCL/FLACCLWriter.cs b/CUETools.Codecs.FLACCL/FLACCLWriter.cs index 4638f6b..fd6e9c2 100644 --- a/CUETools.Codecs.FLACCL/FLACCLWriter.cs +++ b/CUETools.Codecs.FLACCL/FLACCLWriter.cs @@ -2023,7 +2023,7 @@ namespace CUETools.Codecs.FLACCL //// return blocksize >> 1; ////} - for (int i = 8; i < Flake.flac_blocksizes.Length; i++) + for (int i = 8; i < Flake.flac_blocksizes.Length - 1; i++) if (target >= Flake.flac_blocksizes[i] && Flake.flac_blocksizes[i] > blocksize) { blocksize = Flake.flac_blocksizes[i]; diff --git a/CUETools.Codecs.FLACCL/flac.cl b/CUETools.Codecs.FLACCL/flac.cl index 433f198..bff06e7 100644 --- a/CUETools.Codecs.FLACCL/flac.cl +++ b/CUETools.Codecs.FLACCL/flac.cl @@ -1118,8 +1118,10 @@ void clEstimateResidual( // calculate rice partition bit length for every 32 samples barrier(CLK_LOCAL_MEM_FENCE); - // Bug: if (MAX_BLOCKSIZE >> (ESTPARTLOG + 1)) > GROUP_SIZE - uint pl = get_local_id(0) < (MAX_BLOCKSIZE >> (ESTPARTLOG + 1)) ? psum[tid * 2] + psum[tid * 2 + 1] : 0; +#if (MAX_BLOCKSIZE >> (ESTPARTLOG + 1)) > GROUP_SIZE +#error MAX_BLOCKSIZE is too large for this GROUP_SIZE +#endif + uint pl = tid < (MAX_BLOCKSIZE >> (ESTPARTLOG + 1)) ? psum[tid * 2] + psum[tid * 2 + 1] : 0; barrier(CLK_LOCAL_MEM_FENCE); // for (int pos = 0; pos < (MAX_BLOCKSIZE >> ESTPARTLOG) / 2; pos += GROUP_SIZE) // { @@ -1131,7 +1133,7 @@ void clEstimateResidual( // psum[offs] = pl; // } int k = clamp(31 - (int)clz(pl) - (ESTPARTLOG + 1), 0, MAX_RICE_PARAM); // 26 - clz(res) == clz(32) - clz(res) == log2(res / 32) - if (tid < (MAX_BLOCKSIZE >> ESTPARTLOG) / 2) + if (tid < MAX_BLOCKSIZE >> (ESTPARTLOG + 1)) psum[tid] = (k << (ESTPARTLOG + 1)) + (pl >> k); barrier(CLK_LOCAL_MEM_FENCE); for (int l = MAX_BLOCKSIZE >> (ESTPARTLOG + 2); l > 0; l >>= 1) diff --git a/CUETools.FLACCL.cmd/Program.cs b/CUETools.FLACCL.cmd/Program.cs index ce73547..36fdd82 100644 --- a/CUETools.FLACCL.cmd/Program.cs +++ b/CUETools.FLACCL.cmd/Program.cs @@ -48,7 +48,7 @@ namespace CUETools.FLACCL.cmd Console.WriteLine("OpenCL Options:"); Console.WriteLine(); Console.WriteLine(" --opencl-type CPU or GPU, default GPU"); - Console.WriteLine(" --opencl-platform 'ATI Stream', 'NVIDIA CUDA', 'Intel OpenCL' etc"); + Console.WriteLine(" --opencl-platform \"ATI Stream\", \"NVIDIA CUDA\", \"Intel(R) OpenCL\" etc"); Console.WriteLine(" --group-size # Set GPU workgroup size (64,128,256)"); Console.WriteLine(" --task-size # Set number of frames per multiprocessor, default 8"); Console.WriteLine(" --slow-gpu Some encoding stages are done on CPU");