Files
cuetools.net/CUETools.CLParity/parity.cl

200 lines
6.2 KiB
Common Lisp
Raw Normal View History

/**
* CUETools.CLParity: Reed-Solomon (32 bit) using OpenCL
Bump copyright year to 2020 The copyright year was last time updated in 2018. There is some cleanup involved in this commit and the next copyright year update is supposed to be simpler (i.e. substitute "-2020"). - Substitute occurrences of "-2018" with "-2020" using: git grep -I -l -e '-2018' -- ':(exclude)*.bak' | xargs \ sed -b -i -e 's/-2018/-2020/g' - Update special cases: CUEPlayer git grep -I -l -e 'Grigory Chudov 2010' -- | xargs \ sed -b -i -e 's/Grigory Chudov 2010/2010-2020 Grigory Chudov/g' CUERipper git grep -I -l -e '2008-2009' -- | xargs \ sed -b -i -e 's/2008-2009/2008-2020/g' CUETools, CUETools.FLACCL.cmd git grep -I -l -e '2008-2010' -- ':(exclude)*FlaCuda*' | xargs \ sed -b -i -e 's/2008-2010/2008-2020/g' git grep -I -l -e '2010-2013' -- | xargs \ sed -b -i -e 's/2010-2013/2010-2020/g' CUETools.ChaptersToCue git grep -I -l -e 'Grigory Chudov 2017' -- | xargs \ sed -b -i -e 's/Grigory Chudov 2017/2017-2020 Grigory Chudov/g' CUETools.CTDB.EACPlugin git grep -I -l -e 'Grigory Chudov 2012' -- | xargs \ sed -b -i -e 's/Grigory Chudov 2012/2012-2020 Grigory Chudov/g' git grep -I -l -e '2011-12' -- | xargs \ sed -b -i -e 's/2011-12/2011-2020/g' CUETools.Codecs.FLACCL git grep -I -l -e '2009-2010' -- ':(exclude)*FlaCuda*' | xargs \ sed -b -i -e 's/2009-2010/2009-2020/g' CUETools.eac3ui (BluTools) git grep -I -l -e '© 2018' -- | xargs \ sed -b -i -e 's/© 2018/© 2018-2020 Grigory Chudov/g' CUETools.Flake git grep -I -l -e ' 2009-2014 Gr' -- | xargs \ sed -b -i -e 's/ 2009-2014 Gr/ 2009-2020 Gr/g' CUETools.Processor git grep -I -l -e ' 2008-2013 Gr' -- | xargs \ sed -b -i -e 's/ 2008-2013 Gr/ 2008-2020 Gr/g' CUETools.Ripper.Console git grep -I -l -e ' 2008-10 Gr' -- | xargs \ sed -b -i -e 's/ 2008-10 Gr/ 2008-2020 Gr/g' CUETools.Ripper.Console, CUETools.Ripper.SCSI git grep -I -l -e ' 2008-13 Gr' -- | xargs \ sed -b -i -e 's/ 2008-13 Gr/ 2008-2020 Gr/g' Single year entries: 2008, 2009, 2010, 2011, 2017, 2018 git grep -I -l -e ' 2008 Gr' -- | xargs \ sed -b -i -e 's/ 2008 Gr/ 2008-2020 Gr/g' git grep -I -l -e ' 2009 Gr' -- ':(exclude)*FlaCuda*' | xargs \ sed -b -i -e 's/ 2009 Gr/ 2009-2020 Gr/g' git grep -I -l -e ' 2010 Gr' -- | xargs \ sed -b -i -e 's/ 2010 Gr/ 2010-2020 Gr/g' git grep -I -l -e ' 2011 Gr' -- | xargs \ sed -b -i -e 's/ 2011 Gr/ 2011-2020 Gr/g' git grep -I -l -e ' 2017 Gr' -- | xargs \ sed -b -i -e 's/ 2017 Gr/ 2017-2020 Gr/g' git grep -I -l -e ' 2018 Gr' -- | xargs \ sed -b -i -e 's/ 2018 Gr/ 2018-2020 Gr/g' Fix typo in copyright year of CUETools.Codecs.WMA/AudioDecoder.cs: Copyright (c) 20139 Grigory Chudov git grep -I -lw -e '20139' -- | xargs \ sed -b -i -e 's/20139/2013-2020/g'
2020-01-30 18:13:46 +01:00
* Copyright (c) 2010-2020 Gregory S. Chudov
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#define WARP_SIZE 32
int
galois_mul(int x, int y)
{
const int poly = 00020000007;
int prod = 0;
//#pragma unroll 0
for (int i = 0; i < 4; i++)
{
if (x & 1) prod ^= y;
y = (y << 1) ^ ((y >> 31) ? poly : 0);
if (x & 2) prod ^= y;
y = (y << 1) ^ ((y >> 31) ? poly : 0);
if (x & 4) prod ^= y;
y = (y << 1) ^ ((y >> 31) ? poly : 0);
if (x & 8) prod ^= y;
y = (y << 1) ^ ((y >> 31) ? poly : 0);
if (x & 16) prod ^= y;
y = (y << 1) ^ ((y >> 31) ? poly : 0);
if (x & 32) prod ^= y;
y = (y << 1) ^ ((y >> 31) ? poly : 0);
if (x & 64) prod ^= y;
y = (y << 1) ^ ((y >> 31) ? poly : 0);
if (x & 128) prod ^= y;
y = (y << 1) ^ ((y >> 31) ? poly : 0);
x >>= 8;
}
return prod;
}
__kernel __attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1)))
void reedSolomonInit(__global int *encodeGx0, __global int *encodeGx1, __global int *parity0, __global int *parity1, int npar)
{
int i = get_group_id(0) * GROUP_SIZE + get_local_id(0);
parity0[i] = parity1[i] = 0;
encodeGx0[i] = encodeGx1[i] = (i == npar - 1 ? 1 : 0);
if (i < GROUP_SIZE) encodeGx0[npar + i] = encodeGx1[npar + i] = 0;
if (i == 0) parity0[npar] = parity1[npar] = 0;
}
__kernel __attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1)))
void reedSolomonInitGx(__global int *exp, __global int *old_encodeGx, __global int *new_encodeGx, int npar, int step)
{
int tid = get_local_id(0);
int i = get_group_id(0) * (GROUP_SIZE / 2) + tid;
__local int gx[GROUP_SIZE];
__local int ex[GROUP_SIZE / 2];
gx[tid] = old_encodeGx[i];
if (tid < GROUP_SIZE / 2) ex[tid] = exp[step * (GROUP_SIZE / 2) + tid];
barrier(CLK_LOCAL_MEM_FENCE);
for (int s = 0; s < GROUP_SIZE / 2; s++)
{
int p = tid + s < (GROUP_SIZE - 1) ? galois_mul(gx[tid], ex[s]) ^ gx[tid + 1] : 0;
barrier(CLK_LOCAL_MEM_FENCE);
gx[tid] = p;
barrier(CLK_LOCAL_MEM_FENCE);
}
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < GROUP_SIZE / 2) new_encodeGx[i] = gx[tid];
}
__kernel __attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1)))
void reedSolomonA(__global int* data, __global int*encodeGx, __global int*old_parity, int npar, int offset)
{
int tid = get_local_id(0);
__local int ib[GROUP_SIZE];
__local int gx[GROUP_SIZE];
ib[tid] = data[offset + tid] ^ old_parity[tid];
gx[tid] = encodeGx[tid];
barrier(CLK_LOCAL_MEM_FENCE);
for (int s = 0; s < GROUP_SIZE; s++)
{
if (tid > s)
ib[tid] ^= galois_mul(ib[s], gx[tid - 1 - s]);
barrier(CLK_LOCAL_MEM_FENCE);
}
data[offset + tid] = ib[tid];
}
__kernel __attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1)))
void reedSolomonB(__global int* data, __global int* encodeGx, __global int* parity, int npar, int offset)
{
__local int ib[GROUP_SIZE];
__local int gx[GROUP_SIZE * 2];
int tid = get_local_id(0);
int i = (get_group_id(0) * GROUP_SIZE) + get_local_id(0);
ib[tid] = data[offset + tid];
gx[tid] = encodeGx[i];
gx[tid + GROUP_SIZE] = i + GROUP_SIZE >= npar ? 0 : encodeGx[i + GROUP_SIZE];
barrier(CLK_LOCAL_MEM_FENCE);
int p = i + GROUP_SIZE >= npar ? 0 : parity[i + GROUP_SIZE];
for (int s = 0; s < GROUP_SIZE; s++)
p ^= galois_mul(ib[s], gx[tid + GROUP_SIZE - 1 - s]);
parity[i] = p;
}
__kernel __attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1)))
void reedSolomon(__global int*data, __global int*encodeGx, __global int*old_parity, __global int *new_parity, int npar, int offset)
{
int tid = get_local_id(0);
int i = (get_group_id(0) * GROUP_SIZE) + get_local_id(0);
__local int old[GROUP_SIZE];
__local int ib;
if (tid == 0)
ib = old_parity[0] ^ data[offset];
if (tid == 0)
old[GROUP_SIZE - 1] = old_parity[i + GROUP_SIZE];
else
old[tid - 1] = old_parity[i];
barrier(CLK_LOCAL_MEM_FENCE);
new_parity[i] = old[tid] ^ galois_mul(ib, encodeGx[i]);
//new_parity[i] = old_parity[i + 1] ^ galois_mul(ib, encodeGx[i]);
}
__kernel __attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1)))
void reedSolomonDecodeInit(__global int* data, int dataLen, __global int*expTbl, __global int*syn, int npar)
{
int i = (get_group_id(0) * GROUP_SIZE) + get_local_id(0);
syn[i] = 0;
}
__kernel __attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1)))
void reedSolomonDecode(__global int* data, int dataLen, __global int*expTbl, __global int*syn, int npar)
{
int tid = get_local_id(0);
int i = (get_group_id(0) * GROUP_SIZE) + get_local_id(0);
__local int ds[GROUP_SIZE];
ds[tid] = data[tid];
barrier(CLK_LOCAL_MEM_FENCE);
int wk = syn[i];
int ai = expTbl[i];
for (int s = 0; s < GROUP_SIZE; s++)
wk = ds[s] ^ galois_mul(wk, ai);
syn[i] = wk;
}
int galois_exp(int t, int n)
{
int e = 1;
for (int i = 0; i < 32; i++)
{
if ((n >> i) & 1) e = galois_mul(e, t);
t = galois_mul(t, t);
}
return e;
}
__kernel __attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1)))
void chienSearch(__global int *wk_out, int pos, __global int *sigma, int jisu, int n)
{
const int poly = 00020000007;
const int oneByTwo = (poly >> 1) | (1 << 31);
int tid = get_local_id(0);
int i = (get_group_id(0) * GROUP_SIZE) + get_local_id(0);
int zexp = galois_exp(oneByTwo, i + pos);
int wk = 1;
int jzexp = 1;
__local int sj;
for (int j = 1; j <= jisu; j++)
{
jzexp = galois_mul(jzexp, zexp);
if (tid == 0) sj = sigma[j];
barrier(CLK_LOCAL_MEM_FENCE);
wk ^= galois_mul(sj, jzexp); // wk ^= sigma[j] / 2^(ij);
barrier(CLK_LOCAL_MEM_FENCE);
}
wk_out[i] = wk;
}