optimizations

This commit is contained in:
chudov
2010-11-14 20:48:27 +00:00
parent 050140aa7f
commit 3f830ec184
2 changed files with 19 additions and 26 deletions

View File

@@ -2399,7 +2399,7 @@ namespace CUETools.Codecs.FLACCL
residualTasksLen = sizeof(FLACCLSubframeTask) * 32 * channelsCount * FLACCLWriter.maxFrames; residualTasksLen = sizeof(FLACCLSubframeTask) * 32 * channelsCount * FLACCLWriter.maxFrames;
bestResidualTasksLen = sizeof(FLACCLSubframeTask) * channels * FLACCLWriter.maxFrames; bestResidualTasksLen = sizeof(FLACCLSubframeTask) * channels * FLACCLWriter.maxFrames;
int samplesBufferLen = sizeof(int) * FLACCLWriter.MAX_BLOCKSIZE * channelsCount; int samplesBufferLen = sizeof(int) * FLACCLWriter.MAX_BLOCKSIZE * channelsCount;
int residualBufferLen = sizeof(int) * FLACCLWriter.MAX_BLOCKSIZE * channelsCount; // *channels! but need to adjust residualOffser int residualBufferLen = sizeof(int) * FLACCLWriter.MAX_BLOCKSIZE * channels; // need to adjust residualOffset?
int partitionsLen = sizeof(int) * (30 << 8) * channels * FLACCLWriter.maxFrames; int partitionsLen = sizeof(int) * (30 << 8) * channels * FLACCLWriter.maxFrames;
int riceParamsLen = sizeof(int) * (4 << 8) * channels * FLACCLWriter.maxFrames; int riceParamsLen = sizeof(int) * (4 << 8) * channels * FLACCLWriter.maxFrames;
int autocorLen = sizeof(float) * (MAX_ORDER + 1) * lpc.MAX_LPC_WINDOWS * channelsCount * FLACCLWriter.maxFrames; int autocorLen = sizeof(float) * (MAX_ORDER + 1) * lpc.MAX_LPC_WINDOWS * channelsCount * FLACCLWriter.maxFrames;
@@ -2407,7 +2407,7 @@ namespace CUETools.Codecs.FLACCL
int resOutLen = sizeof(int) * channelsCount * (lpc.MAX_LPC_WINDOWS * lpc.MAX_LPC_ORDER + 8) * FLACCLWriter.maxFrames; int resOutLen = sizeof(int) * channelsCount * (lpc.MAX_LPC_WINDOWS * lpc.MAX_LPC_ORDER + 8) * FLACCLWriter.maxFrames;
int wndLen = sizeof(float) * FLACCLWriter.MAX_BLOCKSIZE /** 2*/ * lpc.MAX_LPC_WINDOWS; int wndLen = sizeof(float) * FLACCLWriter.MAX_BLOCKSIZE /** 2*/ * lpc.MAX_LPC_WINDOWS;
int selectedLen = sizeof(int) * 32 * channelsCount * FLACCLWriter.maxFrames; int selectedLen = sizeof(int) * 32 * channelsCount * FLACCLWriter.maxFrames;
int riceLen = sizeof(int) * channelsCount * FLACCLWriter.MAX_BLOCKSIZE; int riceLen = sizeof(int) * channels * FLACCLWriter.MAX_BLOCKSIZE;
if (!writer._settings.MappedMemory) if (!writer._settings.MappedMemory)
{ {

View File

@@ -1780,32 +1780,25 @@ void clRiceEncoding(
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
//if ((get_global_id(0) == 64 || get_global_id(0) == 63) && pos == 0) //if ((get_global_id(0) == 64 || get_global_id(0) == 63) && pos == 0)
// printf("v=%x,k=%d,mylen=%d,mypos=%d,pstart=%d,partlen=%d\n", v, k, mylen, mypos[tid], pstart, partlen); // printf("v=%x,k=%d,mylen=%d,mypos=%d,pstart=%d,partlen=%d\n", v, k, mylen, mypos[tid], pstart, partlen);
if (mylen > 0) if (pstart && mylen)
{ {
if (pstart) int kpos = mypos[tid] - mylen;
{ int kpos0 = (kpos >> 5) - start32;
int kpos = mypos[tid] - mylen; int kpos1 = kpos & 31;
unsigned int kval = (k << 28); unsigned int kval = k << 28;
// if (get_group_id(0) == 0 && kpos / 32 - task.encodingOffset / 32 == 5 && pos == 0) unsigned int kval0 = kval >> kpos1;
//printf("{%08X |= %08X}\n", data[kpos / 32 - start32], kval >> (kpos & 31)); unsigned int kval1 = select(0, kval << (32 - kpos1), kpos1);
atom_or(&data[kpos / 32 - start32], kval >> (kpos & 31)); atom_or(&data[kpos0], kval0);
if ((kpos & 31) != 0) atom_or(&data[kpos0 + 1], kval1);
atom_or(&data[kpos / 32 - start32 + 1], kval << (32 - (kpos & 31)));
}
int qpos = mypos[tid] - k - 1;
unsigned int qval = (1U << 31) | (v << (31 - k));
//if (get_group_id(0) == 0 && qpos / 32 - task.encodingOffset / 32 == 5 && pos == 0)
// printf("(%08X |= %08X) tid == %d, qpos == %d, qval == %08X\n", data[qpos / 32 - start32], qval >> (qpos & 31), tid, qpos, qval);
// if (get_group_id(0) == 0 && pos == 0)
// {
// printf("[%08X] (%08X |= %08X) qval==%08x qpos==%08x\n", qpos / 32 - start32, data[qpos / 32 - start32], qval >> (qpos & 31), qval, qpos);
//if (qval << (32 - (qpos & 31)) != 0)
// printf("[%08X] (%08X |= %08X)\n", qpos / 32 - start32 + 1, data[qpos / 32 - start32+1], qval << (32 - (qpos & 31)));
// }
atom_or(&data[qpos / 32 - start32], qval >> (qpos & 31));
if ((qpos & 31) != 0)
atom_or(&data[qpos / 32 - start32 + 1], qval << (32 - (qpos & 31)));
} }
int qpos = mypos[tid] - k - 1;
int qpos0 = (qpos >> 5) - start32;
int qpos1 = qpos & 31;
unsigned int qval = select(0, (1U << 31) | (v << (31 - k)), mylen);
unsigned int qval0 = qval >> qpos1;
unsigned int qval1= select(0, qval << (32 - qpos1), qpos1);
atom_or(&data[qpos0], qval0);
atom_or(&data[qpos0 + 1], qval1);
if (tid == GROUP_SIZE - 1) if (tid == GROUP_SIZE - 1)
start = mypos[tid]; start = mypos[tid];
//if (get_group_id(0) == 0 && pos == 0) //if (get_group_id(0) == 0 && pos == 0)