mirror of
https://github.com/claunia/cuetools.net.git
synced 2025-12-16 18:14:25 +00:00
testing on CPU
This commit is contained in:
@@ -2217,6 +2217,7 @@ namespace CUETools.Codecs.FLACCL
|
||||
public Kernel clComputeLPC;
|
||||
//public Kernel cudaComputeLPCLattice;
|
||||
public Kernel clQuantizeLPC;
|
||||
public Kernel clSelectStereoTasks;
|
||||
public Kernel clEstimateResidual;
|
||||
public Kernel clChooseBestMethod;
|
||||
public Kernel clCopyBestMethod;
|
||||
@@ -2372,6 +2373,7 @@ namespace CUETools.Codecs.FLACCL
|
||||
clComputeLPC = openCLProgram.CreateKernel("clComputeLPC");
|
||||
clQuantizeLPC = openCLProgram.CreateKernel("clQuantizeLPC");
|
||||
//cudaComputeLPCLattice = openCLProgram.CreateKernel("clComputeLPCLattice");
|
||||
clSelectStereoTasks = openCLProgram.CreateKernel("clSelectStereoTasks");
|
||||
clEstimateResidual = openCLProgram.CreateKernel("clEstimateResidual");
|
||||
clChooseBestMethod = openCLProgram.CreateKernel("clChooseBestMethod");
|
||||
clCopyBestMethod = openCLProgram.CreateKernel("clCopyBestMethod");
|
||||
@@ -2421,6 +2423,7 @@ namespace CUETools.Codecs.FLACCL
|
||||
clComputeLPC.Dispose();
|
||||
clQuantizeLPC.Dispose();
|
||||
//cudaComputeLPCLattice.Dispose();
|
||||
clSelectStereoTasks.Dispose();
|
||||
clEstimateResidual.Dispose();
|
||||
clChooseBestMethod.Dispose();
|
||||
clCopyBestMethod.Dispose();
|
||||
@@ -2524,9 +2527,9 @@ namespace CUETools.Codecs.FLACCL
|
||||
clChannelDecorr.SetArgs(
|
||||
clSamples,
|
||||
clSamplesBytes,
|
||||
FLACCLWriter.MAX_BLOCKSIZE);
|
||||
FLACCLWriter.MAX_BLOCKSIZE/4);
|
||||
|
||||
openCLCQ.EnqueueNDRangeKernel(clChannelDecorr, 0, frameSize * frameCount);
|
||||
openCLCQ.EnqueueNDRangeKernel(clChannelDecorr, 0, frameSize * frameCount / 4);
|
||||
|
||||
if (eparams.do_wasted)
|
||||
{
|
||||
@@ -2723,7 +2726,6 @@ namespace CUETools.Codecs.FLACCL
|
||||
}
|
||||
}
|
||||
|
||||
#if HJHKHJ
|
||||
public static class OpenCLExtensions
|
||||
{
|
||||
public static void SetArgs(this Kernel kernel, params object[] args)
|
||||
@@ -2754,5 +2756,4 @@ namespace CUETools.Codecs.FLACCL
|
||||
queue.EnqueueNDRangeKernel(kernel, 2, null, new long[] { localSizeX * globalSizeX, localSizeY * globalSizeY }, new long[] { localSizeX, localSizeY });
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -52,7 +52,8 @@ typedef struct
|
||||
int wbits;
|
||||
int abits;
|
||||
int porder;
|
||||
int reserved[2];
|
||||
int ignore;
|
||||
int reserved;
|
||||
} FLACCLSubframeData;
|
||||
|
||||
typedef struct
|
||||
@@ -62,34 +63,60 @@ typedef struct
|
||||
} FLACCLSubframeTask;
|
||||
|
||||
__kernel void clStereoDecorr(
|
||||
__global int *samples,
|
||||
__global short2 *src,
|
||||
__global int4 *samples,
|
||||
__global int4 *src,
|
||||
int offset
|
||||
)
|
||||
{
|
||||
int pos = get_global_id(0);
|
||||
if (pos < offset)
|
||||
{
|
||||
short2 s = src[pos];
|
||||
samples[pos] = s.x;
|
||||
samples[1 * offset + pos] = s.y;
|
||||
samples[2 * offset + pos] = (s.x + s.y) >> 1;
|
||||
samples[3 * offset + pos] = s.x - s.y;
|
||||
int4 s = src[pos];
|
||||
int4 x = (s << 16) >> 16;
|
||||
int4 y = s >> 16;
|
||||
samples[pos] = x;
|
||||
samples[1 * offset + pos] = y;
|
||||
samples[2 * offset + pos] = (x + y) >> 1;
|
||||
samples[3 * offset + pos] = x - y;
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void clWindowRectangle(__global float* window, int windowOffset)
|
||||
{
|
||||
window[get_global_id(0)] = 1.0f;
|
||||
}
|
||||
|
||||
__kernel void clWindowFlattop(__global float* window, int windowOffset)
|
||||
{
|
||||
float p = M_PI * get_global_id(0) / (get_global_size(0) - 1);
|
||||
window[get_global_id(0)] = 1.0f
|
||||
- 1.93f * cos(2 * p)
|
||||
+ 1.29f * cos(4 * p)
|
||||
- 0.388f * cos(6 * p)
|
||||
+ 0.0322f * cos(8 * p);
|
||||
}
|
||||
|
||||
__kernel void clWindowTukey(__global float* window, int windowOffset, float p)
|
||||
{
|
||||
int Np = (int)(p / 2.0f * get_global_size(0)) - 1;
|
||||
int n = select(max(Np, get_global_id(0) - (get_global_size(0) - Np - 1) + Np), get_global_id(0), get_global_id(0) <= Np);
|
||||
window[get_global_id(0)] = 0.5f - 0.5f * cos(M_PI * n / Np);
|
||||
}
|
||||
|
||||
__kernel void clChannelDecorr2(
|
||||
__global int *samples,
|
||||
__global short2 *src,
|
||||
__global int4 *samples,
|
||||
__global int4 *src,
|
||||
int offset
|
||||
)
|
||||
{
|
||||
int pos = get_global_id(0);
|
||||
if (pos < offset)
|
||||
{
|
||||
short2 s = src[pos];
|
||||
samples[pos] = s.x;
|
||||
samples[1 * offset + pos] = s.y;
|
||||
int4 s = src[pos];
|
||||
int4 x = (s << 16) >> 16;
|
||||
int4 y = s >> 16;
|
||||
samples[pos] = x;
|
||||
samples[1 * offset + pos] = y;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -128,6 +155,8 @@ void clFindWastedBits(
|
||||
{
|
||||
ptask[i].data.wbits = w;
|
||||
ptask[i].data.abits = a;
|
||||
ptask[i].data.ignore = 0;//i != 0;
|
||||
ptask[i].data.size = ptask[i].data.obits * ptask[i].data.blocksize;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -413,6 +442,20 @@ inline int calc_residual(__global int *ptr, int * coefs, int ro)
|
||||
default: ENCODE_N(ro, action) \
|
||||
}
|
||||
|
||||
__kernel __attribute__((reqd_work_group_size(1, 1, 1)))
|
||||
void clSelectStereoTasks(
|
||||
__global FLACCLSubframeTask *tasks,
|
||||
int count
|
||||
)
|
||||
{
|
||||
for (int i = 0; i < count; i++)
|
||||
{
|
||||
__global FLACCLSubframeTask* ptask = tasks + count * get_group_id(0) + i;
|
||||
ptask->data.ignore = i != 0;
|
||||
ptask->data.size = ptask->data.obits * ptask->data.blocksize;
|
||||
}
|
||||
}
|
||||
|
||||
__kernel /*__attribute__(( vec_type_hint (int4)))*/ __attribute__((reqd_work_group_size(1, 1, 1)))
|
||||
void clEstimateResidual(
|
||||
__global int*samples,
|
||||
@@ -425,6 +468,12 @@ void clEstimateResidual(
|
||||
#define EPO 6
|
||||
int len[1 << EPO]; // blocksize / 64!!!!
|
||||
|
||||
if (task.data.ignore)
|
||||
{
|
||||
tasks[get_group_id(0)].data.size = task.data.obits * bs;
|
||||
return;
|
||||
}
|
||||
|
||||
__global int *data = &samples[task.data.samplesOffs];
|
||||
// for (int i = ro; i < 32; i++)
|
||||
//task.coefs[i] = 0;
|
||||
@@ -579,13 +628,15 @@ void clCalcPartition16(
|
||||
int max_porder // <= 8
|
||||
)
|
||||
{
|
||||
FLACCLSubframeTask task = tasks[get_group_id(0)];
|
||||
FLACCLSubframeTask task = tasks[get_global_id(0)];
|
||||
int bs = task.data.blocksize;
|
||||
int ro = task.data.residualOrder;
|
||||
__global int *data = &samples[task.data.samplesOffs];
|
||||
__global int *pl = partition_lengths + (1 << (max_porder + 1)) * get_group_id(0);
|
||||
__global int *pl = partition_lengths + (1 << (max_porder + 1)) * get_global_id(0);
|
||||
for (int p = 0; p < (1 << max_porder); p++)
|
||||
pl[p] = 0;
|
||||
//__global int *rptr = residual + task.data.residualOffs;
|
||||
//SWITCH_N((rptr[pos] = t, pl[pos >> 4] += (t << 1) ^ (t >> 31)));
|
||||
SWITCH_N((residual[task.data.residualOffs + pos] = t, t = clamp(t, -0x7fffff, 0x7fffff), t = (t << 1) ^ (t >> 31), pl[pos >> 4] += t));
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user