stability issues

This commit is contained in:
chudov
2010-11-25 09:10:35 +00:00
parent ce99355a43
commit 8857259a5f
4 changed files with 109 additions and 13 deletions

View File

@@ -72,7 +72,7 @@ namespace CUETools.Codecs.FLACCL
public int GroupSize { get; set; }
[DefaultValue(32)]
[SRDescription(typeof(Properties.Resources), "DescriptionTashSize")]
[SRDescription(typeof(Properties.Resources), "DescriptionTaskSize")]
public int TaskSize { get; set; }
[SRDescription(typeof(Properties.Resources), "DescriptionDefines")]
@@ -845,18 +845,17 @@ namespace CUETools.Codecs.FLACCL
if (_settings.GPUOnly && _settings.DoRice)
{
int len = task.BestResidualTasks[index].size - task.BestResidualTasks[index].headerLen;
int pos = task.BestResidualTasks[index].encodingOffset;
if (task.BestResidualTasks[index].size != (int)sub.best.size)
throw new Exception("Encoding offset mismatch");
if (task.BestResidualTasks[index].headerLen != offs0 + 6)
throw new Exception("Encoding offset mismatch");
if (task.BestResidualTasks[index].encodingOffset != frame.writer.BitLength)
if (pos % 8 != frame.writer.BitLength % 8)
throw new Exception("Encoding offset mismatch");
int len = task.BestResidualTasks[index].size - task.BestResidualTasks[index].headerLen;
//Console.WriteLine("{0:x} => {1:x}", _totalSize + frame.writer.BitLength / 8, _totalSize + (frame.writer.BitLength + len) / 8);
// task.BestResidualTasks[index].headerLen
frame.writer.writeints(len, (byte*)task.clRiceOutputPtr);
if (task.BestResidualTasks[index].encodingOffset + len != frame.writer.BitLength)
throw new Exception("Encoding offset mismatch");
frame.writer.writeints(len, pos, (byte*)task.clRiceOutputPtr);
}
else
{
@@ -1212,8 +1211,15 @@ namespace CUETools.Codecs.FLACCL
}
#endif
if (((csum << task.frame.subframes[ch].obits) >= 1UL << 32 && !_settings.DoRice) || !_settings.GPUOnly)
if (((csum << task.frame.subframes[ch].obits) >= 1UL << 32) || !_settings.GPUOnly)
{
if (_settings.GPUOnly && _settings.DoRice)
#if DEBUG
// throw new Exception("DoRice failed");
break;
#else
break;
#endif
if (!unpacked) unpack_samples(task, task.frameSize); unpacked = true;
if ((csum << task.frame.subframes[ch].obits) >= 1UL << 32)
lpc.encode_residual_long(task.frame.subframes[ch].best.residual, task.frame.subframes[ch].samples, task.frame.blocksize, task.frame.subframes[ch].best.order, coefs, task.frame.subframes[ch].best.shift);
@@ -1227,7 +1233,7 @@ namespace CUETools.Codecs.FLACCL
RiceContext rc1 = task.frame.subframes[ch].best.rc;
task.frame.subframes[ch].best.rc = new RiceContext();
#endif
task.frame.subframes[ch].best.size = bits + calc_rice_params(task.frame.subframes[ch].best.rc, pmin, pmax, task.frame.subframes[ch].best.residual, (uint)task.frame.blocksize, (uint)task.frame.subframes[ch].best.order);
task.frame.subframes[ch].best.size = bits + calc_rice_params(task.frame.subframes[ch].best.rc, pmin, pmax, task.frame.subframes[ch].best.residual, (uint)task.frame.blocksize, (uint)task.frame.subframes[ch].best.order);
task.frame.subframes[ch].best.size = measure_subframe(task.frame, task.frame.subframes[ch]);
#if KJHKJH
// check size
@@ -1255,7 +1261,7 @@ namespace CUETools.Codecs.FLACCL
unsafe void select_best_methods(FlacFrame frame, int channelsCount, int iFrame, FLACCLTask task)
{
if (channelsCount == 4 && channels == 2)
if (channelsCount == 4 && channels == 2 && frame.blocksize > 4)
{
if (task.BestResidualTasks[iFrame * 2].channel == 0 && task.BestResidualTasks[iFrame * 2 + 1].channel == 1)
frame.ch_mode = ChannelMode.LeftRight;
@@ -1321,7 +1327,7 @@ namespace CUETools.Codecs.FLACCL
unsafe void estimate_residual(FLACCLTask task, int channelsCount)
{
if (task.frameSize >= 4)
if (task.frameSize > 4)
task.EnqueueKernels();
}
@@ -1605,8 +1611,9 @@ namespace CUETools.Codecs.FLACCL
}
OCLMan.CreateDefaultContext(platformId, (DeviceType)_settings.DeviceType);
bool haveAtom = false;
if (OCLMan.Context.Devices[0].Extensions.Contains("cl_khr_local_int32_extended_atomics"))
_settings.Defines += "#define HAVE_ATOM\n";
haveAtom = true;
else
_settings.GPUOnly = false;
@@ -1618,10 +1625,11 @@ namespace CUETools.Codecs.FLACCL
"#define FLACCL_VERSION \"" + vendor_string + "\"\n" +
(_settings.GPUOnly ? "#define DO_PARTITIONS\n" : "") +
(_settings.DoRice ? "#define DO_RICE\n" : "") +
(haveAtom ? "#define HAVE_ATOM\n" : "") +
#if DEBUG
"#define DEBUG\n" +
#endif
(_settings.DeviceType == OpenCLDeviceType.CPU ? "#define FLACCL_CPU\n" : "") +
(_settings.DeviceType == OpenCLDeviceType.CPU ? "#define FLACCL_CPU\n" : "") +
_settings.Defines + "\n";
try

View File

@@ -1,7 +1,7 @@
//------------------------------------------------------------------------------
// <auto-generated>
// This code was generated by a tool.
// Runtime Version:2.0.50727.4200
// Runtime Version:2.0.50727.4206
//
// Changes to this file may cause incorrect behavior and will be lost if
// the code is regenerated.
@@ -69,6 +69,33 @@ namespace CUETools.Codecs.FLACCL.Properties {
}
}
/// <summary>
/// Looks up a localized string similar to Additional preprocessor definitions for OpenCL kernel.
/// </summary>
internal static string DescriptionDefines {
get {
return ResourceManager.GetString("DescriptionDefines", resourceCulture);
}
}
/// <summary>
/// Looks up a localized string similar to Use CPU or GPU device.
/// </summary>
internal static string DescriptionDeviceType {
get {
return ResourceManager.GetString("DescriptionDeviceType", resourceCulture);
}
}
/// <summary>
/// Looks up a localized string similar to Do final encoding stages on GPU (experimental).
/// </summary>
internal static string DescriptionDoRice {
get {
return ResourceManager.GetString("DescriptionDoRice", resourceCulture);
}
}
/// <summary>
/// Looks up a localized string similar to Use GPU on all stages.
/// </summary>
@@ -78,6 +105,42 @@ namespace CUETools.Codecs.FLACCL.Properties {
}
}
/// <summary>
/// Looks up a localized string similar to GPU thread block size (64, 128, 256).
/// </summary>
internal static string DescriptionGroupSize {
get {
return ResourceManager.GetString("DescriptionGroupSize", resourceCulture);
}
}
/// <summary>
/// Looks up a localized string similar to Device uses host memory (Don&apos;t use).
/// </summary>
internal static string DescriptionMappedMemory {
get {
return ResourceManager.GetString("DescriptionMappedMemory", resourceCulture);
}
}
/// <summary>
/// Looks up a localized string similar to OpenCL platform to use (ATI Stream, NVIDIA OpenCL, Intel OpenCL, etc).
/// </summary>
internal static string DescriptionPlatform {
get {
return ResourceManager.GetString("DescriptionPlatform", resourceCulture);
}
}
/// <summary>
/// Looks up a localized string similar to Number of frames processed simultaniously (32, 64).
/// </summary>
internal static string DescriptionTaskSize {
get {
return ResourceManager.GetString("DescriptionTaskSize", resourceCulture);
}
}
/// <summary>
/// Looks up a localized string similar to Calculate MD5 hash for audio stream.
/// </summary>

View File

@@ -120,9 +120,30 @@
<data name="DescriptionCPUThreads" xml:space="preserve">
<value>Use additional CPU threads</value>
</data>
<data name="DescriptionDefines" xml:space="preserve">
<value>Additional preprocessor definitions for OpenCL kernel</value>
</data>
<data name="DescriptionDeviceType" xml:space="preserve">
<value>Use CPU or GPU device</value>
</data>
<data name="DescriptionDoRice" xml:space="preserve">
<value>Do final encoding stages on GPU (experimental)</value>
</data>
<data name="DescriptionGPUOnly" xml:space="preserve">
<value>Use GPU on all stages</value>
</data>
<data name="DescriptionGroupSize" xml:space="preserve">
<value>GPU thread block size (64, 128, 256)</value>
</data>
<data name="DescriptionMappedMemory" xml:space="preserve">
<value>Device uses host memory (Don't use)</value>
</data>
<data name="DescriptionPlatform" xml:space="preserve">
<value>OpenCL platform to use (ATI Stream, NVIDIA OpenCL, Intel OpenCL, etc)</value>
</data>
<data name="DescriptionTaskSize" xml:space="preserve">
<value>Number of frames processed simultaniously (32, 64)</value>
</data>
<data name="DoMD5Description" xml:space="preserve">
<value>Calculate MD5 hash for audio stream</value>
</data>

View File

@@ -1723,6 +1723,8 @@ void clCalcOutputOffsets(
{
__global FLACCLSubframeTask* task = tasks + iFrame * channels + ch;
offset += 8 + task->data.wbits;
// Add 32 bits to separate frames if header is too small so they can intersect
offset += 64;
task->data.encodingOffset = offset + task->data.headerLen;
offset += task->data.size;
}
@@ -1845,6 +1847,8 @@ void clRiceEncoding(
if (tid < sizeof(task) / sizeof(int))
((__local int*)&task)[tid] = ((__global int*)(&tasks[get_group_id(0)]))[tid];
barrier(CLK_LOCAL_MEM_FENCE);
if (task.type != Fixed && task.type != LPC)
return;
if (tid == 0)
mypos[GROUP_SIZE] = 0;
if (tid < WARP_SIZE)