stability issues

This commit is contained in:
chudov
2010-11-25 09:10:35 +00:00
parent ce99355a43
commit 8857259a5f
4 changed files with 109 additions and 13 deletions

View File

@@ -72,7 +72,7 @@ namespace CUETools.Codecs.FLACCL
public int GroupSize { get; set; } public int GroupSize { get; set; }
[DefaultValue(32)] [DefaultValue(32)]
[SRDescription(typeof(Properties.Resources), "DescriptionTashSize")] [SRDescription(typeof(Properties.Resources), "DescriptionTaskSize")]
public int TaskSize { get; set; } public int TaskSize { get; set; }
[SRDescription(typeof(Properties.Resources), "DescriptionDefines")] [SRDescription(typeof(Properties.Resources), "DescriptionDefines")]
@@ -845,18 +845,17 @@ namespace CUETools.Codecs.FLACCL
if (_settings.GPUOnly && _settings.DoRice) if (_settings.GPUOnly && _settings.DoRice)
{ {
int len = task.BestResidualTasks[index].size - task.BestResidualTasks[index].headerLen;
int pos = task.BestResidualTasks[index].encodingOffset;
if (task.BestResidualTasks[index].size != (int)sub.best.size) if (task.BestResidualTasks[index].size != (int)sub.best.size)
throw new Exception("Encoding offset mismatch"); throw new Exception("Encoding offset mismatch");
if (task.BestResidualTasks[index].headerLen != offs0 + 6) if (task.BestResidualTasks[index].headerLen != offs0 + 6)
throw new Exception("Encoding offset mismatch"); throw new Exception("Encoding offset mismatch");
if (task.BestResidualTasks[index].encodingOffset != frame.writer.BitLength) if (pos % 8 != frame.writer.BitLength % 8)
throw new Exception("Encoding offset mismatch"); throw new Exception("Encoding offset mismatch");
int len = task.BestResidualTasks[index].size - task.BestResidualTasks[index].headerLen;
//Console.WriteLine("{0:x} => {1:x}", _totalSize + frame.writer.BitLength / 8, _totalSize + (frame.writer.BitLength + len) / 8); //Console.WriteLine("{0:x} => {1:x}", _totalSize + frame.writer.BitLength / 8, _totalSize + (frame.writer.BitLength + len) / 8);
// task.BestResidualTasks[index].headerLen // task.BestResidualTasks[index].headerLen
frame.writer.writeints(len, (byte*)task.clRiceOutputPtr); frame.writer.writeints(len, pos, (byte*)task.clRiceOutputPtr);
if (task.BestResidualTasks[index].encodingOffset + len != frame.writer.BitLength)
throw new Exception("Encoding offset mismatch");
} }
else else
{ {
@@ -1212,8 +1211,15 @@ namespace CUETools.Codecs.FLACCL
} }
#endif #endif
if (((csum << task.frame.subframes[ch].obits) >= 1UL << 32 && !_settings.DoRice) || !_settings.GPUOnly) if (((csum << task.frame.subframes[ch].obits) >= 1UL << 32) || !_settings.GPUOnly)
{ {
if (_settings.GPUOnly && _settings.DoRice)
#if DEBUG
// throw new Exception("DoRice failed");
break;
#else
break;
#endif
if (!unpacked) unpack_samples(task, task.frameSize); unpacked = true; if (!unpacked) unpack_samples(task, task.frameSize); unpacked = true;
if ((csum << task.frame.subframes[ch].obits) >= 1UL << 32) if ((csum << task.frame.subframes[ch].obits) >= 1UL << 32)
lpc.encode_residual_long(task.frame.subframes[ch].best.residual, task.frame.subframes[ch].samples, task.frame.blocksize, task.frame.subframes[ch].best.order, coefs, task.frame.subframes[ch].best.shift); lpc.encode_residual_long(task.frame.subframes[ch].best.residual, task.frame.subframes[ch].samples, task.frame.blocksize, task.frame.subframes[ch].best.order, coefs, task.frame.subframes[ch].best.shift);
@@ -1255,7 +1261,7 @@ namespace CUETools.Codecs.FLACCL
unsafe void select_best_methods(FlacFrame frame, int channelsCount, int iFrame, FLACCLTask task) unsafe void select_best_methods(FlacFrame frame, int channelsCount, int iFrame, FLACCLTask task)
{ {
if (channelsCount == 4 && channels == 2) if (channelsCount == 4 && channels == 2 && frame.blocksize > 4)
{ {
if (task.BestResidualTasks[iFrame * 2].channel == 0 && task.BestResidualTasks[iFrame * 2 + 1].channel == 1) if (task.BestResidualTasks[iFrame * 2].channel == 0 && task.BestResidualTasks[iFrame * 2 + 1].channel == 1)
frame.ch_mode = ChannelMode.LeftRight; frame.ch_mode = ChannelMode.LeftRight;
@@ -1321,7 +1327,7 @@ namespace CUETools.Codecs.FLACCL
unsafe void estimate_residual(FLACCLTask task, int channelsCount) unsafe void estimate_residual(FLACCLTask task, int channelsCount)
{ {
if (task.frameSize >= 4) if (task.frameSize > 4)
task.EnqueueKernels(); task.EnqueueKernels();
} }
@@ -1605,8 +1611,9 @@ namespace CUETools.Codecs.FLACCL
} }
OCLMan.CreateDefaultContext(platformId, (DeviceType)_settings.DeviceType); OCLMan.CreateDefaultContext(platformId, (DeviceType)_settings.DeviceType);
bool haveAtom = false;
if (OCLMan.Context.Devices[0].Extensions.Contains("cl_khr_local_int32_extended_atomics")) if (OCLMan.Context.Devices[0].Extensions.Contains("cl_khr_local_int32_extended_atomics"))
_settings.Defines += "#define HAVE_ATOM\n"; haveAtom = true;
else else
_settings.GPUOnly = false; _settings.GPUOnly = false;
@@ -1618,10 +1625,11 @@ namespace CUETools.Codecs.FLACCL
"#define FLACCL_VERSION \"" + vendor_string + "\"\n" + "#define FLACCL_VERSION \"" + vendor_string + "\"\n" +
(_settings.GPUOnly ? "#define DO_PARTITIONS\n" : "") + (_settings.GPUOnly ? "#define DO_PARTITIONS\n" : "") +
(_settings.DoRice ? "#define DO_RICE\n" : "") + (_settings.DoRice ? "#define DO_RICE\n" : "") +
(haveAtom ? "#define HAVE_ATOM\n" : "") +
#if DEBUG #if DEBUG
"#define DEBUG\n" + "#define DEBUG\n" +
#endif #endif
(_settings.DeviceType == OpenCLDeviceType.CPU ? "#define FLACCL_CPU\n" : "") + (_settings.DeviceType == OpenCLDeviceType.CPU ? "#define FLACCL_CPU\n" : "") +
_settings.Defines + "\n"; _settings.Defines + "\n";
try try

View File

@@ -1,7 +1,7 @@
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
// <auto-generated> // <auto-generated>
// This code was generated by a tool. // This code was generated by a tool.
// Runtime Version:2.0.50727.4200 // Runtime Version:2.0.50727.4206
// //
// Changes to this file may cause incorrect behavior and will be lost if // Changes to this file may cause incorrect behavior and will be lost if
// the code is regenerated. // the code is regenerated.
@@ -69,6 +69,33 @@ namespace CUETools.Codecs.FLACCL.Properties {
} }
} }
/// <summary>
/// Looks up a localized string similar to Additional preprocessor definitions for OpenCL kernel.
/// </summary>
internal static string DescriptionDefines {
get {
return ResourceManager.GetString("DescriptionDefines", resourceCulture);
}
}
/// <summary>
/// Looks up a localized string similar to Use CPU or GPU device.
/// </summary>
internal static string DescriptionDeviceType {
get {
return ResourceManager.GetString("DescriptionDeviceType", resourceCulture);
}
}
/// <summary>
/// Looks up a localized string similar to Do final encoding stages on GPU (experimental).
/// </summary>
internal static string DescriptionDoRice {
get {
return ResourceManager.GetString("DescriptionDoRice", resourceCulture);
}
}
/// <summary> /// <summary>
/// Looks up a localized string similar to Use GPU on all stages. /// Looks up a localized string similar to Use GPU on all stages.
/// </summary> /// </summary>
@@ -78,6 +105,42 @@ namespace CUETools.Codecs.FLACCL.Properties {
} }
} }
/// <summary>
/// Looks up a localized string similar to GPU thread block size (64, 128, 256).
/// </summary>
internal static string DescriptionGroupSize {
get {
return ResourceManager.GetString("DescriptionGroupSize", resourceCulture);
}
}
/// <summary>
/// Looks up a localized string similar to Device uses host memory (Don&apos;t use).
/// </summary>
internal static string DescriptionMappedMemory {
get {
return ResourceManager.GetString("DescriptionMappedMemory", resourceCulture);
}
}
/// <summary>
/// Looks up a localized string similar to OpenCL platform to use (ATI Stream, NVIDIA OpenCL, Intel OpenCL, etc).
/// </summary>
internal static string DescriptionPlatform {
get {
return ResourceManager.GetString("DescriptionPlatform", resourceCulture);
}
}
/// <summary>
/// Looks up a localized string similar to Number of frames processed simultaniously (32, 64).
/// </summary>
internal static string DescriptionTaskSize {
get {
return ResourceManager.GetString("DescriptionTaskSize", resourceCulture);
}
}
/// <summary> /// <summary>
/// Looks up a localized string similar to Calculate MD5 hash for audio stream. /// Looks up a localized string similar to Calculate MD5 hash for audio stream.
/// </summary> /// </summary>

View File

@@ -120,9 +120,30 @@
<data name="DescriptionCPUThreads" xml:space="preserve"> <data name="DescriptionCPUThreads" xml:space="preserve">
<value>Use additional CPU threads</value> <value>Use additional CPU threads</value>
</data> </data>
<data name="DescriptionDefines" xml:space="preserve">
<value>Additional preprocessor definitions for OpenCL kernel</value>
</data>
<data name="DescriptionDeviceType" xml:space="preserve">
<value>Use CPU or GPU device</value>
</data>
<data name="DescriptionDoRice" xml:space="preserve">
<value>Do final encoding stages on GPU (experimental)</value>
</data>
<data name="DescriptionGPUOnly" xml:space="preserve"> <data name="DescriptionGPUOnly" xml:space="preserve">
<value>Use GPU on all stages</value> <value>Use GPU on all stages</value>
</data> </data>
<data name="DescriptionGroupSize" xml:space="preserve">
<value>GPU thread block size (64, 128, 256)</value>
</data>
<data name="DescriptionMappedMemory" xml:space="preserve">
<value>Device uses host memory (Don't use)</value>
</data>
<data name="DescriptionPlatform" xml:space="preserve">
<value>OpenCL platform to use (ATI Stream, NVIDIA OpenCL, Intel OpenCL, etc)</value>
</data>
<data name="DescriptionTaskSize" xml:space="preserve">
<value>Number of frames processed simultaniously (32, 64)</value>
</data>
<data name="DoMD5Description" xml:space="preserve"> <data name="DoMD5Description" xml:space="preserve">
<value>Calculate MD5 hash for audio stream</value> <value>Calculate MD5 hash for audio stream</value>
</data> </data>

View File

@@ -1723,6 +1723,8 @@ void clCalcOutputOffsets(
{ {
__global FLACCLSubframeTask* task = tasks + iFrame * channels + ch; __global FLACCLSubframeTask* task = tasks + iFrame * channels + ch;
offset += 8 + task->data.wbits; offset += 8 + task->data.wbits;
// Add 32 bits to separate frames if header is too small so they can intersect
offset += 64;
task->data.encodingOffset = offset + task->data.headerLen; task->data.encodingOffset = offset + task->data.headerLen;
offset += task->data.size; offset += task->data.size;
} }
@@ -1845,6 +1847,8 @@ void clRiceEncoding(
if (tid < sizeof(task) / sizeof(int)) if (tid < sizeof(task) / sizeof(int))
((__local int*)&task)[tid] = ((__global int*)(&tasks[get_group_id(0)]))[tid]; ((__local int*)&task)[tid] = ((__global int*)(&tasks[get_group_id(0)]))[tid];
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
if (task.type != Fixed && task.type != LPC)
return;
if (tid == 0) if (tid == 0)
mypos[GROUP_SIZE] = 0; mypos[GROUP_SIZE] = 0;
if (tid < WARP_SIZE) if (tid < WARP_SIZE)