mirror of
https://github.com/claunia/cuetools.net.git
synced 2025-12-16 18:14:25 +00:00
Schur recursion for better precision
This commit is contained in:
@@ -105,10 +105,12 @@ extern "C" __global__ void cudaComputeLPC(
|
||||
{
|
||||
__shared__ struct {
|
||||
computeAutocorTaskStruct task;
|
||||
float tmp[32];
|
||||
float buf[32];
|
||||
float ldr[32];
|
||||
int bits[32];
|
||||
float autoc[33];
|
||||
float gen0[32];
|
||||
float gen1[32];
|
||||
float reff[32];
|
||||
int cbits;
|
||||
} shared;
|
||||
const int tid = threadIdx.x;
|
||||
@@ -129,44 +131,46 @@ extern "C" __global__ void cudaComputeLPC(
|
||||
shared.autoc[tid] += autoc[(blockIdx.y * partCount + part) * (max_order + 1) + tid];
|
||||
|
||||
__syncthreads();
|
||||
|
||||
|
||||
if (tid < 32)
|
||||
shared.tmp[tid] = 0.0f;
|
||||
{
|
||||
shared.gen0[tid] = shared.autoc[tid+1];
|
||||
shared.gen1[tid] = shared.autoc[tid+1];
|
||||
shared.ldr[tid] = 0.0f;
|
||||
|
||||
float err = shared.autoc[0];
|
||||
|
||||
for(int order = 0; order < max_order; order++)
|
||||
{
|
||||
if (tid < 32)
|
||||
{
|
||||
shared.buf[tid] = (tid < order) * shared.tmp[tid] * shared.autoc[order - tid];
|
||||
shared.buf[tid] += shared.buf[tid + 16];
|
||||
shared.buf[tid] += shared.buf[tid + 8];
|
||||
shared.buf[tid] += shared.buf[tid + 4];
|
||||
shared.buf[tid] += shared.buf[tid + 2];
|
||||
shared.buf[tid] += shared.buf[tid + 1];
|
||||
}
|
||||
__syncthreads();
|
||||
|
||||
float r = (- shared.autoc[order+1] - shared.buf[0]) / err;
|
||||
|
||||
err *= 1.0f - (r * r);
|
||||
|
||||
shared.tmp[tid] += (tid < order) * r * shared.tmp[order - 1 - tid] + (tid == order) * r;
|
||||
|
||||
if (tid < 32)
|
||||
__syncthreads();
|
||||
float error = shared.autoc[0];
|
||||
for (int order = 0; order < max_order; order++)
|
||||
{
|
||||
// Schur recursion
|
||||
float reff = -shared.gen1[0] / error;
|
||||
if (tid == 0) shared.reff[order] = reff;
|
||||
error += shared.gen1[0] * reff;
|
||||
if (tid < max_order - order - 1)
|
||||
{
|
||||
float g1 = shared.gen1[tid + 1] + shared.reff[order] * shared.gen0[tid];
|
||||
float g0 = shared.gen1[tid + 1] * shared.reff[order] + shared.gen0[tid];
|
||||
shared.gen1[tid] = g1;
|
||||
shared.gen0[tid] = g0;
|
||||
}
|
||||
__syncthreads();
|
||||
|
||||
// Levinson-Durbin recursion
|
||||
shared.ldr[tid] += (tid < order) * reff * shared.ldr[order - 1 - tid] + (tid == order) * reff;
|
||||
|
||||
// Quantization
|
||||
int precision = 13;
|
||||
int taskNo = shared.task.residualOffs + order;
|
||||
shared.bits[tid] = __mul24((33 - __clz(__float2int_rn(fabs(shared.tmp[tid]) * (1 << 15))) - precision), tid <= order);
|
||||
int taskNo = shared.task.residualOffs + order;
|
||||
shared.bits[tid] = __mul24((33 - __clz(__float2int_rn(fabs(shared.ldr[tid]) * (1 << 15))) - precision), tid <= order);
|
||||
shared.bits[tid] = max(shared.bits[tid], shared.bits[tid + 16]);
|
||||
shared.bits[tid] = max(shared.bits[tid], shared.bits[tid + 8]);
|
||||
shared.bits[tid] = max(shared.bits[tid], shared.bits[tid + 4]);
|
||||
shared.bits[tid] = max(shared.bits[tid], shared.bits[tid + 2]);
|
||||
shared.bits[tid] = max(shared.bits[tid], shared.bits[tid + 1]);
|
||||
int sh = max(0,min(15, 15 - shared.bits[0]));
|
||||
|
||||
// reverse coefs
|
||||
int coef = max(-(1 << precision),min((1 << precision)-1,__float2int_rn(-shared.tmp[order - tid] * (1 << sh))));
|
||||
int coef = max(-(1 << precision),min((1 << precision)-1,__float2int_rn(-shared.ldr[order - tid] * (1 << sh))));
|
||||
if (tid <= order)
|
||||
output[taskNo].coefs[tid] = coef;
|
||||
if (tid == 0)
|
||||
@@ -180,8 +184,7 @@ extern "C" __global__ void cudaComputeLPC(
|
||||
int cbits = shared.bits[0];
|
||||
if (tid == 0)
|
||||
output[taskNo].cbits = cbits;
|
||||
}
|
||||
__syncthreads();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -278,118 +278,117 @@ code {
|
||||
code {
|
||||
name = cudaComputeLPC
|
||||
lmem = 0
|
||||
smem = 572
|
||||
reg = 9
|
||||
smem = 828
|
||||
reg = 10
|
||||
bar = 1
|
||||
const {
|
||||
segname = const
|
||||
segnum = 1
|
||||
offset = 0
|
||||
bytes = 48
|
||||
bytes = 44
|
||||
mem {
|
||||
0x00000003 0x0000001f 0x7e800000 0x3f800000
|
||||
0x00000001 0x0000000f 0x00001fff 0xffffe000
|
||||
0x3e800000 0x00000020 0x0000009e 0x00000008
|
||||
0x00000003 0x0000001f 0x7e800000 0x00000001
|
||||
0x0000000f 0x00001fff 0xffffe000 0x3e800000
|
||||
0x00000020 0x0000009e 0x00000008
|
||||
}
|
||||
}
|
||||
bincode {
|
||||
0xa0000009 0x04000780 0x308005fd 0x644107c8
|
||||
0xa000000d 0x04000780 0x308007fd 0x644107c8
|
||||
0xa000b003 0x00000000 0x1000b003 0x00000280
|
||||
0xa0004e01 0x04200780 0x30040001 0xc4100780
|
||||
0x30020405 0xc4100780 0x2100ec00 0x20008200
|
||||
0xd00e0001 0x80c00780 0x00020405 0xc0000780
|
||||
0x04001201 0xe4200780 0x3002ce01 0x6c2187d2
|
||||
0xa00001fd 0x0c0147c8 0x00020405 0xc0001680
|
||||
0x0400da01 0xe43f1680 0x861ffe03 0x00000000
|
||||
0x30020605 0xc4100780 0x2100ec00 0x20008200
|
||||
0xd00e0001 0x80c00780 0x00020605 0xc0000780
|
||||
0x04001201 0xe4200780 0x3003ce01 0x6c2187d2
|
||||
0xa00001fd 0x0c0147c8 0x00020605 0xc0001680
|
||||
0x04009a01 0xe43f1680 0x861ffe03 0x00000000
|
||||
0x307cd1fd 0x6c20c7d8 0x1002b003 0x00001280
|
||||
0x1000f805 0x0403c780 0xa0027003 0x00000000
|
||||
0x10027003 0x00000100 0x1000d001 0x0423c780
|
||||
0x40014e0d 0x00200780 0x3010060d 0xc4100780
|
||||
0x60004e0d 0x0020c780 0x2101ee01 0x00000003
|
||||
0x2000060d 0x04004780 0x40010c11 0x00000780
|
||||
0x60000e11 0x00010780 0x30100811 0xc4100780
|
||||
0x60000c01 0x00010780 0x20000401 0x04000780
|
||||
0x30020001 0xc4100780 0x00020405 0xc0000780
|
||||
0x40014e09 0x00200780 0x30100409 0xc4100780
|
||||
0x60004e09 0x00208780 0x2101ee01 0x00000003
|
||||
0x20000409 0x04004780 0x40010811 0x00000780
|
||||
0x60000a11 0x00010780 0x30100811 0xc4100780
|
||||
0x60000801 0x00010780 0x20000601 0x04000780
|
||||
0x30020001 0xc4100780 0x00020605 0xc0000780
|
||||
0x2000ca01 0x04200780 0xd00e0001 0x80c00780
|
||||
0xd4036809 0x20000780 0xb800c001 0x00200780
|
||||
0x0400da01 0xe4200780 0xf0000001 0xe0000002
|
||||
0xd4026809 0x20000780 0xb800c001 0x00200780
|
||||
0x04009a01 0xe4200780 0xf0000001 0xe0000002
|
||||
0x20018205 0x00000003 0x3001d1fd 0x6c2147d8
|
||||
0x10013003 0x00001280 0x861ffe03 0x00000000
|
||||
0x30810401 0x6c40c7d0 0xa00001fd 0x0c0147c8
|
||||
0x00020405 0xc0001680 0x04001a01 0xe43f1680
|
||||
0xd0036805 0x20000780 0x307ccffd 0x6c20c7d8
|
||||
0x1400c001 0x0423c780 0x30000003 0x00001280
|
||||
0x10248005 0x00000003 0x00000205 0xc0000780
|
||||
0x3002040d 0xc4100780 0x1000f811 0x0403c780
|
||||
0x20400805 0x04008780 0x00020209 0xc0000780
|
||||
0xa004e003 0x00000000 0x300209fd 0x6c00c7d8
|
||||
0x1004e003 0x00000100 0xd8036811 0x20000780
|
||||
0x0000060d 0xc0000780 0x1000c005 0x0423c784
|
||||
0xcc01da05 0x00200780 0x1000f805 0x0403d280
|
||||
0xdc017011 0x20000780 0x0c005a01 0xe4204780
|
||||
0xb000de05 0x00204784 0x0c005a01 0xe4204780
|
||||
0xb000ce05 0x00204784 0x0c005a01 0xe4204780
|
||||
0xb000c605 0x00204784 0x0c005a01 0xe4204780
|
||||
0xb000c205 0x00204784 0x0c005a01 0xe4204780
|
||||
0xb000c005 0x00204784 0x0c005a01 0xe4204780
|
||||
0x308107fd 0x6c4107c8 0x30000003 0x00000280
|
||||
0x00020605 0xc0000780 0xd4027009 0x20000780
|
||||
0x1900e000 0x1900e004 0x0400dc01 0xe4200780
|
||||
0x04011c01 0xe4204780 0x30020611 0xc4100780
|
||||
0x04001a01 0xe43f0780 0x861ffe03 0x00000000
|
||||
0xd0026805 0x20000780 0x307ccffd 0x6c20c7c8
|
||||
0x1400c001 0x0423c780 0x30000003 0x00000280
|
||||
0x307c07fd 0x6c0087c8 0x213fee15 0x0fffffff
|
||||
0x1000f819 0x0403c780 0xd0047005 0x20000780
|
||||
0xb08201fd 0x605107d8 0x10008008 0x1500e004
|
||||
0xa400c01d 0xe4204780 0xc0870e1d 0x00401680
|
||||
0xc0870409 0x00401680 0x90000408 0xc0020e1c
|
||||
0x00020c05 0xc0000680 0xd0047009 0x20000780
|
||||
0x04015c01 0xe421c680 0x1800c005 0x0423c680
|
||||
0xe0010e01 0x00000780 0x20400a05 0x04018780
|
||||
0x300107fd 0x6c0187d8 0xa0058003 0x00000000
|
||||
0x10058003 0x00001280 0x00000805 0xc0000780
|
||||
0x00020c11 0xc0000780 0xd404780d 0x20000780
|
||||
0xd4037009 0x20000780 0xd0057011 0x20000784
|
||||
0x1d00e004 0x1900e008 0xe001c021 0x00208784
|
||||
0x1900e004 0x1d00e008 0xe001c005 0x00208784
|
||||
0x04011c01 0xe4204780 0x0400dc01 0xe4220780
|
||||
0xf0000001 0xe0000002 0x861ffe03 0x00000000
|
||||
0xd0016811 0x20000780 0xd403280d 0x20000780
|
||||
0x1000c005 0x0423c784 0xbc00c015 0x00204780
|
||||
0xb08201fd 0x605107e8 0x10000005 0x0403c780
|
||||
0xa0000a15 0xe4004780 0xc0880a15 0x00402680
|
||||
0xc0880205 0x00402680 0x90000204 0xc0010a18
|
||||
0xc806d815 0x00200780 0x1000f815 0x0403d280
|
||||
0x300209fd 0x6c0147d8 0xb0000c05 0x00014780
|
||||
0x10000a05 0x0403d280 0x0000060d 0xc0000780
|
||||
0xe1060c15 0x0440c780 0xbd017a04 0xc0050000
|
||||
0xa00bb003 0x00000000 0x0c001a01 0xe4204780
|
||||
0x100bb003 0x00000100 0xa0000205 0xc4104780
|
||||
0xc0000205 0x04700003 0xa0000215 0x8c0047d0
|
||||
0x2000d605 0x04210780 0xa0000a15 0x44065680
|
||||
0x30170a15 0xec101680 0x31000a15 0x04429680
|
||||
0x10001215 0x2440d100 0x30020819 0x6c0187d0
|
||||
0x30148a15 0x00000003 0xd0840c19 0x04400780
|
||||
0x0000060d 0xc0000780 0x40060a15 0x00018780
|
||||
0xdc027011 0x20000780 0x0c009a01 0xe4214780
|
||||
0x3005de15 0x8c200784 0x0c009a01 0xe4214780
|
||||
0x3005ce15 0x8c200784 0x0c009a01 0xe4214780
|
||||
0x3005c615 0x8c200784 0x0c009a01 0xe4214780
|
||||
0x3005c215 0x8c200784 0x0c009a01 0xe4214780
|
||||
0x3005c015 0x8c200784 0x0c009a01 0xe4214780
|
||||
0xd002680d 0x20000780 0x3d0fe015 0x00000003
|
||||
0x30850a15 0xac400780 0x10018019 0x00000003
|
||||
0x307c0a15 0x8c000780 0x30050c19 0xc4000780
|
||||
0xa0000c19 0x44014780 0xc806da19 0x00200780
|
||||
0xa0000c19 0xac004780 0x30860c19 0xac400780
|
||||
0xa0091003 0x00000000 0x30870c19 0x8c400780
|
||||
0x10091003 0x00001100 0x3007021d 0xc4100780
|
||||
0x30060221 0xc4100780 0x20088e1c 0x2107e81c
|
||||
0x2000061d 0x0401c780 0x20008e1d 0x00000007
|
||||
0xd00e0e19 0xa0c00780 0x307c041d 0x6c0087e2
|
||||
0xa0000ffd 0x0c0147d8 0x3007021d 0xc4102680
|
||||
0x30060221 0xc4102680 0x20000e1d 0x04022680
|
||||
0x2000c81d 0x0421e680 0x21000e1d 0x0442e680
|
||||
0xd00e0e15 0xa0c02680 0x307c0dfd 0x6c0087e8
|
||||
0xa0000c15 0x44066500 0x30170a15 0xec102500
|
||||
0x31000a15 0x0442a500 0x10001215 0x2440e280
|
||||
0xd0060019 0x0402c780 0x307c0dfd 0x6c0087e8
|
||||
0xa0000c19 0x44066500 0x30170c19 0xec102500
|
||||
0x31000c19 0x0442a500 0x10001219 0x2440e280
|
||||
0x30060a15 0x8c000780 0x00000609 0xc0000780
|
||||
0x30218a15 0x00000003 0xd802700d 0x20000780
|
||||
0x08009a01 0xe4214780 0x3c05de15 0x8c200780
|
||||
0x08009a01 0xe4214780 0x3c05ce15 0x8c200780
|
||||
0x08009a01 0xe4214780 0x3c05c615 0x8c200780
|
||||
0x08009a01 0xe4214780 0x3c05c215 0x8c200780
|
||||
0x08009a01 0xe4214780 0x3c05c015 0x8c200780
|
||||
0x08009a01 0xe4214780 0x100bb003 0x00001100
|
||||
0x30070215 0xc4100780 0x30060205 0xc4100780
|
||||
0x20000a05 0x04004780 0xd0026809 0x20000780
|
||||
0x2101e814 0x1900e004 0x200c8a15 0x00000003
|
||||
0xd00e0a05 0xa0c00780 0xf0000001 0xe0000002
|
||||
0x861ffe03 0x00000000 0x20018811 0x00000003
|
||||
0x3004cffd 0x6c2147d8 0xd4000805 0x20000780
|
||||
0x10038003 0x00001280 0xf0000001 0xe0000001
|
||||
0x20400c05 0x0400c780 0x00020205 0xc0000780
|
||||
0x30030dfd 0x6c00c7d8 0xc407d809 0x00200780
|
||||
0x1000f809 0x0403d280 0x30030dfd 0x6c0147d8
|
||||
0xb0000405 0x0001c780 0x10000405 0x0403d280
|
||||
0x00000809 0xc0000780 0xb800da05 0x00204780
|
||||
0xa0000209 0xc4104780 0xc0000409 0x04700003
|
||||
0x08001a01 0xe4204780 0xa0000409 0x8c0047d0
|
||||
0x2000d605 0x04218780 0xa0000409 0x44065680
|
||||
0x30170409 0xec101680 0x31000409 0x04425680
|
||||
0x10001009 0x2440d100 0x30030c1d 0x6c0187d0
|
||||
0x30148409 0x00000003 0xd0830e1d 0x04400780
|
||||
0x40070409 0x00018780 0x00000809 0xc0000780
|
||||
0xd801700d 0x20000780 0x08005a01 0xe4208780
|
||||
0x3c02de09 0x8c200780 0x08005a01 0xe4208780
|
||||
0x3c02ce09 0x8c200780 0x08005a01 0xe4208780
|
||||
0x3c02c609 0x8c200780 0x08005a01 0xe4208780
|
||||
0x3c02c209 0x8c200780 0x08005a01 0xe4208780
|
||||
0x3c02c009 0x8c200780 0x08005a01 0xe4208780
|
||||
0xd0016809 0x20000780 0x390fe009 0x00000003
|
||||
0x30840409 0xac400780 0x1001801d 0x00000003
|
||||
0x307c0409 0x8c000780 0x30020e1d 0xc4000780
|
||||
0xa0000e1d 0x44014780 0xc407da1d 0x00200780
|
||||
0xa0000e1d 0xac004780 0x30850e1d 0xac400780
|
||||
0xa0091003 0x00000000 0x30860e1d 0x8c400780
|
||||
0x10091003 0x00001100 0x30070221 0xc4100780
|
||||
0x30060225 0xc4100780 0x20099020 0x2108e820
|
||||
0x20000821 0x04020780 0x20009021 0x00000007
|
||||
0xd00e101d 0xa0c00780 0xf0000001 0xe0000002
|
||||
0x30070221 0xc4100680 0x30060225 0xc4100680
|
||||
0x20001021 0x04024680 0x2000c821 0x04220680
|
||||
0x21001021 0x04428680 0xd00e1009 0xa0c00680
|
||||
0x307c0ffd 0x6c0087d8 0xa0000e09 0x44065500
|
||||
0x30170409 0xec101500 0x31000409 0x04425500
|
||||
0x10001009 0x2440d280 0xd007001d 0x0402c780
|
||||
0x307c0ffd 0x6c0087d8 0xa0000e1d 0x44065500
|
||||
0x30170e1d 0xec101500 0x31000e1d 0x04425500
|
||||
0x1000101d 0x2440d280 0x30070409 0x8c000780
|
||||
0x00000805 0xc0000780 0x30218409 0x00000003
|
||||
0xd4017009 0x20000780 0x04005a01 0xe4208780
|
||||
0x3802de09 0x8c200780 0x04005a01 0xe4208780
|
||||
0x3802ce09 0x8c200780 0x04005a01 0xe4208780
|
||||
0x3802c609 0x8c200780 0x04005a01 0xe4208780
|
||||
0x3802c209 0x8c200780 0x04005a01 0xe4208780
|
||||
0x3802c009 0x8c200780 0x04005a01 0xe4208780
|
||||
0xa00bb003 0x00000000 0x100bb003 0x00000100
|
||||
0x30070209 0xc4100780 0x30060205 0xc4100780
|
||||
0x20000405 0x04004780 0xd0016805 0x20000780
|
||||
0x2101e808 0x1500e004 0x200c8409 0x00000003
|
||||
0xd00e0405 0xa0c00780 0xf0000001 0xe0000002
|
||||
0x20018c19 0x00000003 0x3006cffd 0x6c2147d8
|
||||
0x1003d003 0x00001280 0xf0000001 0xe0000001
|
||||
}
|
||||
}
|
||||
code {
|
||||
|
||||
Reference in New Issue
Block a user