FLACCL: was producing broken files when using -11 --fast-gpu

This commit is contained in:
Grigory Chudov
2013-10-15 00:22:41 -04:00
parent 1aeed2e7a8
commit 0855fbfaae

View File

@@ -796,7 +796,7 @@ void clQuantizeLPC(
int cbits = min(51 - 2 * clz(shared.task.blocksize), shared.task.abits) - minprecision + (i - ((i >> precisions) << precisions));
#if BITS_PER_SAMPLE <= 16
// Limit cbits so that 32-bit arithmetics will be enough when calculating residual
cbits = min(cbits, clz(order) + 1 - shared.task.obits);
cbits = min(cbits, clz(order + 1) + 1 - shared.task.obits);
#endif
cbits = clamp(cbits, 3, 15);
@@ -1584,7 +1584,7 @@ void clCalcPartition32(
barrier(CLK_LOCAL_MEM_FENCE);
// we must ensure that psize * (t >> k) doesn't overflow;
uint4 lim = 0x07ffffffU;
uint4 lim = 0x03ffffffU;
int x = tid >> 5;
__local uint * chunk = &res[x << 5];
// calc number of unary bits for each group of 32 residual samples
@@ -1984,7 +1984,6 @@ void clCalcOutputOffsets(
int firstFrame
)
{
const int channels = 2;
__local FLACCLSubframeData ltasks[MAX_CHANNELS];
__local volatile int mypos[MAX_CHANNELS];
int offset = 0;
@@ -2153,15 +2152,17 @@ void clRiceEncoding(
{
int offs = pos + tid;
int iv = residual[task.residualOffs + offs];
int part = offs / plen; // >> plenoffs;
int part = offs / plen;
//int part = offs >> plenoffs;
#if 0
int k = brp[part];
#else
int k = best_rice_parameters[(get_group_id(0) << max_porder) + part];
#endif
int pstart = offs == task.residualOrder || offs == part * plen;
int pstart = offs == part * plen;
//int pstart = offs == part << plenoffs;
uint v = (iv << 1) ^ (iv >> 31);
int mylen = select(0, (int)(v >> k) + 1 + k + select(0, RICE_PARAM_BITS, pstart), offs >= task.residualOrder && offs < bs);
int mylen = select(0, (int)(v >> k) + 1 + k, offs >= task.residualOrder && offs < bs) + select(0, RICE_PARAM_BITS, pstart);
mypos[tid] = mylen;
// Inclusive scan(+)
@@ -2187,8 +2188,6 @@ void clRiceEncoding(
// printf("Oops: %d\n", mypos[tid]);
data[tid] = select(0U, remainder, tid == 0);
barrier(CLK_LOCAL_MEM_FENCE);
if (mylen)
{
if (pstart)
{
int kpos = mp - mylen;
@@ -2200,6 +2199,8 @@ void clRiceEncoding(
if (kval0) atomic_or(&data[kpos0], kval0);
if (kpos1 && kval1) atomic_or(&data[kpos0 + 1], kval1);
}
if (offs >= task.residualOrder && offs < bs)
{
int qpos = mp - k - 1;
int qpos0 = (qpos >> 5) - start32;
int qpos1 = qpos & 31;
@@ -2221,9 +2222,9 @@ void clRiceEncoding(
int part = offs / plen; // >> plenoffs;
//int k = brp[min(255, part)];
int k = offs < bs ? best_rice_parameters[(get_group_id(0) << max_porder) + part] : 0;
int pstart = offs == task.residualOrder || offs == part * plen;
int pstart = offs == part * plen;
uint v = (iv << 1) ^ (iv >> 31);
int mylen = select(0, (int)(v >> k) + 1 + k + select(0, RICE_PARAM_BITS, pstart), offs >= task.residualOrder && offs < bs);
int mylen = select(0, (int)(v >> k) + 1 + k, offs >= task.residualOrder && offs < bs) + select(0, RICE_PARAM_BITS, pstart);
mypos[tid] = mylen;
// Inclusive scan(+)
@@ -2248,8 +2249,6 @@ void clRiceEncoding(
// printf("Oops: %d\n", mypos[tid]);
data[tid] = select(0U, remainder, tid == 0);
barrier(CLK_LOCAL_MEM_FENCE);
if (mylen)
{
if (pstart)
{
int kpos = mp - mylen;
@@ -2261,6 +2260,8 @@ void clRiceEncoding(
if (kval0) atomic_or(&data[kpos0], kval0);
if (kpos1 && kval1) atomic_or(&data[kpos0 + 1], kval1);
}
if (offs >= task.residualOrder && offs < bs)
{
int qpos = mp - k - 1;
int qpos0 = (qpos >> 5) - start32;
int qpos1 = qpos & 31;