mirror of
https://github.com/claunia/cuetools.net.git
synced 2025-12-16 18:14:25 +00:00
testing on CPU
This commit is contained in:
@@ -2217,6 +2217,7 @@ namespace CUETools.Codecs.FLACCL
|
|||||||
public Kernel clComputeLPC;
|
public Kernel clComputeLPC;
|
||||||
//public Kernel cudaComputeLPCLattice;
|
//public Kernel cudaComputeLPCLattice;
|
||||||
public Kernel clQuantizeLPC;
|
public Kernel clQuantizeLPC;
|
||||||
|
public Kernel clSelectStereoTasks;
|
||||||
public Kernel clEstimateResidual;
|
public Kernel clEstimateResidual;
|
||||||
public Kernel clChooseBestMethod;
|
public Kernel clChooseBestMethod;
|
||||||
public Kernel clCopyBestMethod;
|
public Kernel clCopyBestMethod;
|
||||||
@@ -2372,6 +2373,7 @@ namespace CUETools.Codecs.FLACCL
|
|||||||
clComputeLPC = openCLProgram.CreateKernel("clComputeLPC");
|
clComputeLPC = openCLProgram.CreateKernel("clComputeLPC");
|
||||||
clQuantizeLPC = openCLProgram.CreateKernel("clQuantizeLPC");
|
clQuantizeLPC = openCLProgram.CreateKernel("clQuantizeLPC");
|
||||||
//cudaComputeLPCLattice = openCLProgram.CreateKernel("clComputeLPCLattice");
|
//cudaComputeLPCLattice = openCLProgram.CreateKernel("clComputeLPCLattice");
|
||||||
|
clSelectStereoTasks = openCLProgram.CreateKernel("clSelectStereoTasks");
|
||||||
clEstimateResidual = openCLProgram.CreateKernel("clEstimateResidual");
|
clEstimateResidual = openCLProgram.CreateKernel("clEstimateResidual");
|
||||||
clChooseBestMethod = openCLProgram.CreateKernel("clChooseBestMethod");
|
clChooseBestMethod = openCLProgram.CreateKernel("clChooseBestMethod");
|
||||||
clCopyBestMethod = openCLProgram.CreateKernel("clCopyBestMethod");
|
clCopyBestMethod = openCLProgram.CreateKernel("clCopyBestMethod");
|
||||||
@@ -2421,6 +2423,7 @@ namespace CUETools.Codecs.FLACCL
|
|||||||
clComputeLPC.Dispose();
|
clComputeLPC.Dispose();
|
||||||
clQuantizeLPC.Dispose();
|
clQuantizeLPC.Dispose();
|
||||||
//cudaComputeLPCLattice.Dispose();
|
//cudaComputeLPCLattice.Dispose();
|
||||||
|
clSelectStereoTasks.Dispose();
|
||||||
clEstimateResidual.Dispose();
|
clEstimateResidual.Dispose();
|
||||||
clChooseBestMethod.Dispose();
|
clChooseBestMethod.Dispose();
|
||||||
clCopyBestMethod.Dispose();
|
clCopyBestMethod.Dispose();
|
||||||
@@ -2524,9 +2527,9 @@ namespace CUETools.Codecs.FLACCL
|
|||||||
clChannelDecorr.SetArgs(
|
clChannelDecorr.SetArgs(
|
||||||
clSamples,
|
clSamples,
|
||||||
clSamplesBytes,
|
clSamplesBytes,
|
||||||
FLACCLWriter.MAX_BLOCKSIZE);
|
FLACCLWriter.MAX_BLOCKSIZE/4);
|
||||||
|
|
||||||
openCLCQ.EnqueueNDRangeKernel(clChannelDecorr, 0, frameSize * frameCount);
|
openCLCQ.EnqueueNDRangeKernel(clChannelDecorr, 0, frameSize * frameCount / 4);
|
||||||
|
|
||||||
if (eparams.do_wasted)
|
if (eparams.do_wasted)
|
||||||
{
|
{
|
||||||
@@ -2723,7 +2726,6 @@ namespace CUETools.Codecs.FLACCL
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#if HJHKHJ
|
|
||||||
public static class OpenCLExtensions
|
public static class OpenCLExtensions
|
||||||
{
|
{
|
||||||
public static void SetArgs(this Kernel kernel, params object[] args)
|
public static void SetArgs(this Kernel kernel, params object[] args)
|
||||||
@@ -2754,5 +2756,4 @@ namespace CUETools.Codecs.FLACCL
|
|||||||
queue.EnqueueNDRangeKernel(kernel, 2, null, new long[] { localSizeX * globalSizeX, localSizeY * globalSizeY }, new long[] { localSizeX, localSizeY });
|
queue.EnqueueNDRangeKernel(kernel, 2, null, new long[] { localSizeX * globalSizeX, localSizeY * globalSizeY }, new long[] { localSizeX, localSizeY });
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -52,7 +52,8 @@ typedef struct
|
|||||||
int wbits;
|
int wbits;
|
||||||
int abits;
|
int abits;
|
||||||
int porder;
|
int porder;
|
||||||
int reserved[2];
|
int ignore;
|
||||||
|
int reserved;
|
||||||
} FLACCLSubframeData;
|
} FLACCLSubframeData;
|
||||||
|
|
||||||
typedef struct
|
typedef struct
|
||||||
@@ -62,34 +63,60 @@ typedef struct
|
|||||||
} FLACCLSubframeTask;
|
} FLACCLSubframeTask;
|
||||||
|
|
||||||
__kernel void clStereoDecorr(
|
__kernel void clStereoDecorr(
|
||||||
__global int *samples,
|
__global int4 *samples,
|
||||||
__global short2 *src,
|
__global int4 *src,
|
||||||
int offset
|
int offset
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
int pos = get_global_id(0);
|
int pos = get_global_id(0);
|
||||||
if (pos < offset)
|
if (pos < offset)
|
||||||
{
|
{
|
||||||
short2 s = src[pos];
|
int4 s = src[pos];
|
||||||
samples[pos] = s.x;
|
int4 x = (s << 16) >> 16;
|
||||||
samples[1 * offset + pos] = s.y;
|
int4 y = s >> 16;
|
||||||
samples[2 * offset + pos] = (s.x + s.y) >> 1;
|
samples[pos] = x;
|
||||||
samples[3 * offset + pos] = s.x - s.y;
|
samples[1 * offset + pos] = y;
|
||||||
|
samples[2 * offset + pos] = (x + y) >> 1;
|
||||||
|
samples[3 * offset + pos] = x - y;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
__kernel void clWindowRectangle(__global float* window, int windowOffset)
|
||||||
|
{
|
||||||
|
window[get_global_id(0)] = 1.0f;
|
||||||
|
}
|
||||||
|
|
||||||
|
__kernel void clWindowFlattop(__global float* window, int windowOffset)
|
||||||
|
{
|
||||||
|
float p = M_PI * get_global_id(0) / (get_global_size(0) - 1);
|
||||||
|
window[get_global_id(0)] = 1.0f
|
||||||
|
- 1.93f * cos(2 * p)
|
||||||
|
+ 1.29f * cos(4 * p)
|
||||||
|
- 0.388f * cos(6 * p)
|
||||||
|
+ 0.0322f * cos(8 * p);
|
||||||
|
}
|
||||||
|
|
||||||
|
__kernel void clWindowTukey(__global float* window, int windowOffset, float p)
|
||||||
|
{
|
||||||
|
int Np = (int)(p / 2.0f * get_global_size(0)) - 1;
|
||||||
|
int n = select(max(Np, get_global_id(0) - (get_global_size(0) - Np - 1) + Np), get_global_id(0), get_global_id(0) <= Np);
|
||||||
|
window[get_global_id(0)] = 0.5f - 0.5f * cos(M_PI * n / Np);
|
||||||
|
}
|
||||||
|
|
||||||
__kernel void clChannelDecorr2(
|
__kernel void clChannelDecorr2(
|
||||||
__global int *samples,
|
__global int4 *samples,
|
||||||
__global short2 *src,
|
__global int4 *src,
|
||||||
int offset
|
int offset
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
int pos = get_global_id(0);
|
int pos = get_global_id(0);
|
||||||
if (pos < offset)
|
if (pos < offset)
|
||||||
{
|
{
|
||||||
short2 s = src[pos];
|
int4 s = src[pos];
|
||||||
samples[pos] = s.x;
|
int4 x = (s << 16) >> 16;
|
||||||
samples[1 * offset + pos] = s.y;
|
int4 y = s >> 16;
|
||||||
|
samples[pos] = x;
|
||||||
|
samples[1 * offset + pos] = y;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -128,6 +155,8 @@ void clFindWastedBits(
|
|||||||
{
|
{
|
||||||
ptask[i].data.wbits = w;
|
ptask[i].data.wbits = w;
|
||||||
ptask[i].data.abits = a;
|
ptask[i].data.abits = a;
|
||||||
|
ptask[i].data.ignore = 0;//i != 0;
|
||||||
|
ptask[i].data.size = ptask[i].data.obits * ptask[i].data.blocksize;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -413,6 +442,20 @@ inline int calc_residual(__global int *ptr, int * coefs, int ro)
|
|||||||
default: ENCODE_N(ro, action) \
|
default: ENCODE_N(ro, action) \
|
||||||
}
|
}
|
||||||
|
|
||||||
|
__kernel __attribute__((reqd_work_group_size(1, 1, 1)))
|
||||||
|
void clSelectStereoTasks(
|
||||||
|
__global FLACCLSubframeTask *tasks,
|
||||||
|
int count
|
||||||
|
)
|
||||||
|
{
|
||||||
|
for (int i = 0; i < count; i++)
|
||||||
|
{
|
||||||
|
__global FLACCLSubframeTask* ptask = tasks + count * get_group_id(0) + i;
|
||||||
|
ptask->data.ignore = i != 0;
|
||||||
|
ptask->data.size = ptask->data.obits * ptask->data.blocksize;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
__kernel /*__attribute__(( vec_type_hint (int4)))*/ __attribute__((reqd_work_group_size(1, 1, 1)))
|
__kernel /*__attribute__(( vec_type_hint (int4)))*/ __attribute__((reqd_work_group_size(1, 1, 1)))
|
||||||
void clEstimateResidual(
|
void clEstimateResidual(
|
||||||
__global int*samples,
|
__global int*samples,
|
||||||
@@ -425,6 +468,12 @@ void clEstimateResidual(
|
|||||||
#define EPO 6
|
#define EPO 6
|
||||||
int len[1 << EPO]; // blocksize / 64!!!!
|
int len[1 << EPO]; // blocksize / 64!!!!
|
||||||
|
|
||||||
|
if (task.data.ignore)
|
||||||
|
{
|
||||||
|
tasks[get_group_id(0)].data.size = task.data.obits * bs;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
__global int *data = &samples[task.data.samplesOffs];
|
__global int *data = &samples[task.data.samplesOffs];
|
||||||
// for (int i = ro; i < 32; i++)
|
// for (int i = ro; i < 32; i++)
|
||||||
//task.coefs[i] = 0;
|
//task.coefs[i] = 0;
|
||||||
@@ -579,13 +628,15 @@ void clCalcPartition16(
|
|||||||
int max_porder // <= 8
|
int max_porder // <= 8
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
FLACCLSubframeTask task = tasks[get_group_id(0)];
|
FLACCLSubframeTask task = tasks[get_global_id(0)];
|
||||||
int bs = task.data.blocksize;
|
int bs = task.data.blocksize;
|
||||||
int ro = task.data.residualOrder;
|
int ro = task.data.residualOrder;
|
||||||
__global int *data = &samples[task.data.samplesOffs];
|
__global int *data = &samples[task.data.samplesOffs];
|
||||||
__global int *pl = partition_lengths + (1 << (max_porder + 1)) * get_group_id(0);
|
__global int *pl = partition_lengths + (1 << (max_porder + 1)) * get_global_id(0);
|
||||||
for (int p = 0; p < (1 << max_porder); p++)
|
for (int p = 0; p < (1 << max_porder); p++)
|
||||||
pl[p] = 0;
|
pl[p] = 0;
|
||||||
|
//__global int *rptr = residual + task.data.residualOffs;
|
||||||
|
//SWITCH_N((rptr[pos] = t, pl[pos >> 4] += (t << 1) ^ (t >> 31)));
|
||||||
SWITCH_N((residual[task.data.residualOffs + pos] = t, t = clamp(t, -0x7fffff, 0x7fffff), t = (t << 1) ^ (t >> 31), pl[pos >> 4] += t));
|
SWITCH_N((residual[task.data.residualOffs + pos] = t, t = clamp(t, -0x7fffff, 0x7fffff), t = (t << 1) ^ (t >> 31), pl[pos >> 4] += t));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user