mirror of
https://github.com/claunia/cuetools.net.git
synced 2025-12-16 18:14:25 +00:00
optimizations
This commit is contained in:
@@ -474,6 +474,13 @@ void clEstimateResidual(
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
#ifdef AMD
|
||||
float4 fc0 = vload4(0, &fcoef[0]);
|
||||
float4 fc1 = vload4(1, &fcoef[0]);
|
||||
#if MAX_ORDER > 8
|
||||
float4 fc2 = vload4(2, &fcoef[0]);
|
||||
#endif
|
||||
#endif
|
||||
for (int pos = 0; pos < bs; pos += GROUP_SIZE)
|
||||
{
|
||||
// fetch samples
|
||||
@@ -484,10 +491,20 @@ void clEstimateResidual(
|
||||
|
||||
// compute residual
|
||||
__local float* dptr = &data[tid + GROUP_SIZE - ro];
|
||||
float4 sum = vload4(0, &fcoef[0]) * vload4(0, dptr)
|
||||
float4 sum
|
||||
#ifdef AMD
|
||||
= fc0 * vload4(0, dptr)
|
||||
+ fc1 * vload4(1, dptr)
|
||||
#else
|
||||
= vload4(0, &fcoef[0]) * vload4(0, dptr)
|
||||
+ vload4(1, &fcoef[0]) * vload4(1, dptr)
|
||||
#endif
|
||||
#if MAX_ORDER > 8
|
||||
#ifdef AMD
|
||||
+ fc2 * vload4(2, dptr)
|
||||
#else
|
||||
+ vload4(2, &fcoef[0]) * vload4(2, dptr)
|
||||
#endif
|
||||
#if MAX_ORDER > 12
|
||||
+ vload4(3, &fcoef[0]) * vload4(3, dptr)
|
||||
#if MAX_ORDER > 16
|
||||
@@ -502,15 +519,20 @@ void clEstimateResidual(
|
||||
|
||||
int t = convert_int_rte(nextData + sum.x + sum.y + sum.z + sum.w);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
#ifdef AMD
|
||||
data[tid] = nextData;
|
||||
// ensure we're within frame bounds
|
||||
t = select(0, t, offs >= ro && offs < bs);
|
||||
// overflow protection
|
||||
t = iclamp(t, -0x7fffff, 0x7fffff);
|
||||
// convert to unsigned
|
||||
#ifdef AMD
|
||||
data[tid] = nextData;
|
||||
atom_add(&psum[min(63,offs >> partOrder)], (t << 1) ^ (t >> 31));
|
||||
#else
|
||||
// ensure we're within frame bounds
|
||||
t = select(0, t, offs >= ro && offs < bs);
|
||||
// overflow protection
|
||||
t = iclamp(t, -0x7fffff, 0x7fffff);
|
||||
// convert to unsigned
|
||||
data[tid] = (t << 1) ^ (t >> 31);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
int ps = (1 << partOrder) - 1;
|
||||
|
||||
Reference in New Issue
Block a user