mirror of
https://github.com/claunia/cuetools.net.git
synced 2025-12-16 18:14:25 +00:00
200 lines
6.2 KiB
Common Lisp
200 lines
6.2 KiB
Common Lisp
/**
|
|
* CUETools.CLParity: Reed-Solomon (32 bit) using OpenCL
|
|
* Copyright (c) 2010 Gregory S. Chudov
|
|
*
|
|
* This library is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
* License as published by the Free Software Foundation; either
|
|
* version 2.1 of the License, or (at your option) any later version.
|
|
*
|
|
* This library is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* Lesser General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
* License along with this library; if not, write to the Free Software
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
*/
|
|
|
|
#define WARP_SIZE 32
|
|
|
|
int
|
|
galois_mul(int x, int y)
|
|
{
|
|
const int poly = 00020000007;
|
|
int prod = 0;
|
|
|
|
//#pragma unroll 0
|
|
for (int i = 0; i < 4; i++)
|
|
{
|
|
if (x & 1) prod ^= y;
|
|
y = (y << 1) ^ ((y >> 31) ? poly : 0);
|
|
if (x & 2) prod ^= y;
|
|
y = (y << 1) ^ ((y >> 31) ? poly : 0);
|
|
if (x & 4) prod ^= y;
|
|
y = (y << 1) ^ ((y >> 31) ? poly : 0);
|
|
if (x & 8) prod ^= y;
|
|
y = (y << 1) ^ ((y >> 31) ? poly : 0);
|
|
if (x & 16) prod ^= y;
|
|
y = (y << 1) ^ ((y >> 31) ? poly : 0);
|
|
if (x & 32) prod ^= y;
|
|
y = (y << 1) ^ ((y >> 31) ? poly : 0);
|
|
if (x & 64) prod ^= y;
|
|
y = (y << 1) ^ ((y >> 31) ? poly : 0);
|
|
if (x & 128) prod ^= y;
|
|
y = (y << 1) ^ ((y >> 31) ? poly : 0);
|
|
x >>= 8;
|
|
}
|
|
return prod;
|
|
}
|
|
|
|
__kernel __attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1)))
|
|
void reedSolomonInit(__global int *encodeGx0, __global int *encodeGx1, __global int *parity0, __global int *parity1, int npar)
|
|
{
|
|
int i = get_group_id(0) * GROUP_SIZE + get_local_id(0);
|
|
parity0[i] = parity1[i] = 0;
|
|
encodeGx0[i] = encodeGx1[i] = (i == npar - 1 ? 1 : 0);
|
|
if (i < GROUP_SIZE) encodeGx0[npar + i] = encodeGx1[npar + i] = 0;
|
|
if (i == 0) parity0[npar] = parity1[npar] = 0;
|
|
}
|
|
|
|
__kernel __attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1)))
|
|
void reedSolomonInitGx(__global int *exp, __global int *old_encodeGx, __global int *new_encodeGx, int npar, int step)
|
|
{
|
|
int tid = get_local_id(0);
|
|
int i = get_group_id(0) * (GROUP_SIZE / 2) + tid;
|
|
__local int gx[GROUP_SIZE];
|
|
__local int ex[GROUP_SIZE / 2];
|
|
|
|
gx[tid] = old_encodeGx[i];
|
|
if (tid < GROUP_SIZE / 2) ex[tid] = exp[step * (GROUP_SIZE / 2) + tid];
|
|
barrier(CLK_LOCAL_MEM_FENCE);
|
|
for (int s = 0; s < GROUP_SIZE / 2; s++)
|
|
{
|
|
int p = tid + s < (GROUP_SIZE - 1) ? galois_mul(gx[tid], ex[s]) ^ gx[tid + 1] : 0;
|
|
barrier(CLK_LOCAL_MEM_FENCE);
|
|
gx[tid] = p;
|
|
barrier(CLK_LOCAL_MEM_FENCE);
|
|
}
|
|
barrier(CLK_LOCAL_MEM_FENCE);
|
|
if (tid < GROUP_SIZE / 2) new_encodeGx[i] = gx[tid];
|
|
}
|
|
|
|
__kernel __attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1)))
|
|
void reedSolomonA(__global int* data, __global int*encodeGx, __global int*old_parity, int npar, int offset)
|
|
{
|
|
int tid = get_local_id(0);
|
|
__local int ib[GROUP_SIZE];
|
|
__local int gx[GROUP_SIZE];
|
|
|
|
ib[tid] = data[offset + tid] ^ old_parity[tid];
|
|
gx[tid] = encodeGx[tid];
|
|
barrier(CLK_LOCAL_MEM_FENCE);
|
|
|
|
for (int s = 0; s < GROUP_SIZE; s++)
|
|
{
|
|
if (tid > s)
|
|
ib[tid] ^= galois_mul(ib[s], gx[tid - 1 - s]);
|
|
barrier(CLK_LOCAL_MEM_FENCE);
|
|
}
|
|
|
|
data[offset + tid] = ib[tid];
|
|
}
|
|
|
|
__kernel __attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1)))
|
|
void reedSolomonB(__global int* data, __global int* encodeGx, __global int* parity, int npar, int offset)
|
|
{
|
|
__local int ib[GROUP_SIZE];
|
|
__local int gx[GROUP_SIZE * 2];
|
|
int tid = get_local_id(0);
|
|
int i = (get_group_id(0) * GROUP_SIZE) + get_local_id(0);
|
|
ib[tid] = data[offset + tid];
|
|
gx[tid] = encodeGx[i];
|
|
gx[tid + GROUP_SIZE] = i + GROUP_SIZE >= npar ? 0 : encodeGx[i + GROUP_SIZE];
|
|
barrier(CLK_LOCAL_MEM_FENCE);
|
|
|
|
int p = i + GROUP_SIZE >= npar ? 0 : parity[i + GROUP_SIZE];
|
|
for (int s = 0; s < GROUP_SIZE; s++)
|
|
p ^= galois_mul(ib[s], gx[tid + GROUP_SIZE - 1 - s]);
|
|
parity[i] = p;
|
|
}
|
|
|
|
__kernel __attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1)))
|
|
void reedSolomon(__global int*data, __global int*encodeGx, __global int*old_parity, __global int *new_parity, int npar, int offset)
|
|
{
|
|
int tid = get_local_id(0);
|
|
int i = (get_group_id(0) * GROUP_SIZE) + get_local_id(0);
|
|
__local int old[GROUP_SIZE];
|
|
__local int ib;
|
|
if (tid == 0)
|
|
ib = old_parity[0] ^ data[offset];
|
|
if (tid == 0)
|
|
old[GROUP_SIZE - 1] = old_parity[i + GROUP_SIZE];
|
|
else
|
|
old[tid - 1] = old_parity[i];
|
|
barrier(CLK_LOCAL_MEM_FENCE);
|
|
new_parity[i] = old[tid] ^ galois_mul(ib, encodeGx[i]);
|
|
//new_parity[i] = old_parity[i + 1] ^ galois_mul(ib, encodeGx[i]);
|
|
}
|
|
|
|
__kernel __attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1)))
|
|
void reedSolomonDecodeInit(__global int* data, int dataLen, __global int*expTbl, __global int*syn, int npar)
|
|
{
|
|
int i = (get_group_id(0) * GROUP_SIZE) + get_local_id(0);
|
|
syn[i] = 0;
|
|
}
|
|
|
|
__kernel __attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1)))
|
|
void reedSolomonDecode(__global int* data, int dataLen, __global int*expTbl, __global int*syn, int npar)
|
|
{
|
|
int tid = get_local_id(0);
|
|
int i = (get_group_id(0) * GROUP_SIZE) + get_local_id(0);
|
|
__local int ds[GROUP_SIZE];
|
|
|
|
ds[tid] = data[tid];
|
|
barrier(CLK_LOCAL_MEM_FENCE);
|
|
|
|
int wk = syn[i];
|
|
int ai = expTbl[i];
|
|
|
|
for (int s = 0; s < GROUP_SIZE; s++)
|
|
wk = ds[s] ^ galois_mul(wk, ai);
|
|
|
|
syn[i] = wk;
|
|
}
|
|
|
|
int galois_exp(int t, int n)
|
|
{
|
|
int e = 1;
|
|
for (int i = 0; i < 32; i++)
|
|
{
|
|
if ((n >> i) & 1) e = galois_mul(e, t);
|
|
t = galois_mul(t, t);
|
|
}
|
|
return e;
|
|
}
|
|
|
|
__kernel __attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1)))
|
|
void chienSearch(__global int *wk_out, int pos, __global int *sigma, int jisu, int n)
|
|
{
|
|
const int poly = 00020000007;
|
|
const int oneByTwo = (poly >> 1) | (1 << 31);
|
|
int tid = get_local_id(0);
|
|
int i = (get_group_id(0) * GROUP_SIZE) + get_local_id(0);
|
|
int zexp = galois_exp(oneByTwo, i + pos);
|
|
int wk = 1;
|
|
int jzexp = 1;
|
|
__local int sj;
|
|
|
|
for (int j = 1; j <= jisu; j++)
|
|
{
|
|
jzexp = galois_mul(jzexp, zexp);
|
|
if (tid == 0) sj = sigma[j];
|
|
barrier(CLK_LOCAL_MEM_FENCE);
|
|
wk ^= galois_mul(sj, jzexp); // wk ^= sigma[j] / 2^(ij);
|
|
barrier(CLK_LOCAL_MEM_FENCE);
|
|
}
|
|
wk_out[i] = wk;
|
|
}
|